{ "measurement": [ { "key": "model.layers.0.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.02241423726081848, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.0222028698772192, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.01109787542372942, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.011040427722036839, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.010972912423312664, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.007411421742290258, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.03593573719263077, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.021966664120554924, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.010989741422235966, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.010947753675282001, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.011111375875771046, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.013361022807657719, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.010929973796010017, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.008160755969583988, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.007386878132820129, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.00821209792047739, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.007379314862191677, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0071045877411961555, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.007378141395747662, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.007102767936885357, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007319041062146425, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007378159556537867, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0070063392631709576, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.007102192845195532, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.01109787542372942, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, "best_option": { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.01109787542372942, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } } }, { "key": "model.layers.0.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.02278539538383484, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.02250087819993496, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.010553020052611828, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.010475853458046913, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.010395769961178303, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.00554667878895998, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.03017871454358101, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.022234313189983368, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.010428955778479576, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.01035232376307249, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.01050405390560627, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.011540692299604416, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.010330728255212307, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.006681009195744991, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.005502686370164156, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.006605790928006172, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.005487380549311638, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.00506999809294939, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.005484325811266899, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.005067035555839539, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.005165134556591511, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.005483102053403854, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.004639665130525827, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0050643798895180225, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.010553020052611828, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, "best_option": { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.010553020052611828, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } } }, { "key": "model.layers.0.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.06047150865197182, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.050824619829654694, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.04036761447787285, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.034179333597421646, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.02695419453084469, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.019415855407714844, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05134223774075508, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.03880663961172104, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.028571156784892082, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.022343019023537636, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.023570766672492027, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.027569742873311043, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.01890585385262966, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.013441205024719238, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.011753997765481472, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013848471455276012, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.007592359557747841, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.006811372935771942, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.006787540856748819, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.005784602370113134, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007147612981498241, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006232197396457195, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.004075947217643261, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004540584981441498, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.013441205024719238, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.013441205024719238, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.0.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.024277938529849052, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.015320907346904278, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.009084038436412811, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.010144214145839214, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.009667796082794666, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.004402417689561844, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.016014110296964645, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.014296400360763073, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.011365883983671665, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.007315394934266806, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.007685418706387281, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.008113579824566841, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.007107638753950596, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.005203662905842066, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.004650107119232416, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.004285587463527918, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.0035259791184216738, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.003365687793120742, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0032343645580112934, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.002971211913973093, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0026867638807743788, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0032128079328686, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0021948949433863163, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.002836678409948945, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.015320907346904278, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, "best_option": { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.015320907346904278, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } } }, { "key": "model.layers.0.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.056725986301898956, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.0531759038567543, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.05198223888874054, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.04740498960018158, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.025466736406087875, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.02432239055633545, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.028439901769161224, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.026321981102228165, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.025812705978751183, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.02321026660501957, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.02219688519835472, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.014467627741396427, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.012716168537735939, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.012343554757535458, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.01225834060460329, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.007307241205126047, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.006786310113966465, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.006758571602404118, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.006382603198289871, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.006331117823719978, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.004132813308387995, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.00469113327562809, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.004009641706943512, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.0037647245917469263, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.014467627741396427, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.014467627741396427, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.0.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.08293984085321426, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.07782170176506042, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.07621008157730103, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.06951723247766495, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.0372273214161396, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.03558814153075218, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.04153978452086449, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.03835552558302879, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.037718433886766434, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.03392299637198448, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.032468877732753754, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.02096780762076378, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.018225619569420815, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.017736898735165596, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.01761954464018345, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.010476067662239075, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.009257645346224308, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.009212516248226166, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.008618258871138096, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.00854208879172802, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.0055893477983772755, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.005792692769318819, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.005415085703134537, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.004082865547388792, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.018225619569420815, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.01761954464018345, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.0.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.06847520172595978, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.059869274497032166, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.03647558391094208, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.03373030573129654, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.029040295630693436, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.017821643501520157, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.06010555475950241, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.05372268334031105, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.03044305555522442, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.026194090023636818, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.027311701327562332, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.02805783413350582, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.02444656938314438, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.016435930505394936, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.01355612650513649, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.014938436448574066, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.01146214734762907, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.00959191657602787, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.011108794249594212, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.009105232544243336, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.011017324402928352, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01090897899121046, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.008594539947807789, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.008648562245070934, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.017821643501520157, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.014938436448574066, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.1.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.013541688211262226, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.010805565863847733, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.008316602557897568, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.007305650506168604, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.005801249761134386, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0039011044427752495, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.010671703144907951, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.008417477831244469, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.006314495578408241, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.004803172312676907, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.005110582802444696, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.005663391202688217, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.004168890416622162, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.002902722917497158, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0024892427027225494, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0029109343886375427, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.0016474071890115738, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0014630178920924664, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0014794673770666122, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.001233285409398377, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0015554886776953936, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.001380440080538392, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0009522298350930214, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0009965485660359263, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.013541688211262226, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.013541688211262226, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.1.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.01315750926733017, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.010302289389073849, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.007960142567753792, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.006971793249249458, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.005490842275321484, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.00377450417727232, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.009009848348796368, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.007929941639304161, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.006036920938640833, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.004442682955414057, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.004546258598566055, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.004590973258018494, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0037956053856760263, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.002718487521633506, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0023997053503990173, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.002314890269190073, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.0015447031473740935, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0014111136551946402, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.001366686774417758, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.00117538683116436, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.001260093180462718, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0012655140599235892, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0008824495016597211, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0009373225620947778, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.01315750926733017, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.01315750926733017, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } } }, { "key": "model.layers.1.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.07581057399511337, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.06416308134794235, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.05906566604971886, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.050868865102529526, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.03379065543413162, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.028895830735564232, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.042075470089912415, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.03864243999123573, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.03548673167824745, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.027454273775219917, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.025708287954330444, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02148340828716755, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.01853843405842781, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.016329415142536163, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.015773629769682884, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.010790316388010979, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.008639611303806305, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.008431040681898594, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.007421689573675394, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.007052262779325247, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.005706637166440487, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.005848609376698732, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.004934160504490137, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004087743349373341, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.01853843405842781, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.015773629769682884, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.1.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.14965982735157013, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08856740593910217, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.0555889867246151, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.06560049951076508, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06429430097341537, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.034466471523046494, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.09078734368085861, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.08094646781682968, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.07135830074548721, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.040922828018665314, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04370639845728874, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.04671842232346535, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.040012434124946594, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.032579731196165085, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.030588362365961075, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.023966606706380844, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.019437827169895172, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.018833691254258156, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.015885384753346443, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.014634759165346622, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013950565829873085, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015768952667713165, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.011734857223927975, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013106084428727627, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.015885384753346443, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015768952667713165, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.1.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.10806934535503387, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.10282595455646515, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.10132449865341187, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.09322971105575562, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.047951146960258484, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.04650947451591492, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.05279460549354553, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.04879111424088478, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.04838871210813522, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.04454542323946953, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0430632121860981, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.026756232604384422, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.023296548053622246, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.02293713018298149, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.02285337634384632, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.01342584379017353, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.012283320538699627, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.012259885668754578, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.011672953143715858, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.011620895005762577, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.00744049996137619, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.008086026646196842, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.007333602290600538, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.0062068128027021885, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.01342584379017353, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.01342584379017353, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.1.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.13458475470542908, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.12845106422901154, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.12666848301887512, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.11688879877328873, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.059883344918489456, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.05812542885541916, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.06579089164733887, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.06086316332221031, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.06037456914782524, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.0557524748146534, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.053818926215171814, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.03299912437796593, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.02871137298643589, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.02827751822769642, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.028182512149214745, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.016424482688307762, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.014383390545845032, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.014353018254041672, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.013569585047662258, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.013511410914361477, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.008515717461705208, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.008418942801654339, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.008370520547032356, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.005351297557353973, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.016424482688307762, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.016424482688307762, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.1.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.12932808697223663, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.11599507182836533, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.1023675948381424, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.09361161291599274, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.06376638263463974, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.04563944786787033, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.08593863248825073, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.07862617820501328, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.06565972417593002, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.0584200918674469, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.05569641292095184, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.04846792295575142, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.04694255813956261, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.029163675382733345, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.02570972591638565, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.022180456668138504, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.01675451174378395, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.015247348695993423, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.01581808179616928, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.014105982147157192, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.014280757866799831, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.013865482062101364, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.012050707824528217, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.010704189538955688, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.01675451174378395, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.015247348695993423, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.2.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.02975316531956196, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.027309566736221313, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.026172231882810593, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.02343163825571537, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.013454712927341461, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.01243291050195694, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.016366727650165558, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.014588304795324802, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.013734917156398296, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.011900242418050766, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.01136521715670824, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.00837365910410881, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0070053148083388805, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.006463832221925259, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.006326686590909958, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.004234158433973789, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.0033941157162189484, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0033310323487967253, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.003095122752711177, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.003007518360391259, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00226525217294693, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0022158154752105474, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0020108213648200035, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0015466975746676326, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.016366727650165558, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.016366727650165558, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.2.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.02424347586929798, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.022204246371984482, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.021274298429489136, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.01899030990898609, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.010918047279119492, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.010093005374073982, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.013021624647080898, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.011845475994050503, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.011153417639434338, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.009635359048843384, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.00913961697369814, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0065784817561507225, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0056425416842103004, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.005234305281192064, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.005133509635925293, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.003303087782114744, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.0027366015128791332, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0026945939753204584, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.002485742559656501, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.002423281082883477, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.001759446575306356, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.001764354994520545, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0016055761370807886, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0012228593695908785, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.013021624647080898, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.013021624647080898, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.2.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11569193005561829, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10772518813610077, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10510262101888657, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09505168348550797, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05303702875971794, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.050275444984436035, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05951249599456787, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05497819185256958, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05391016975045204, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.047549109905958176, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04512615129351616, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.030186204239726067, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02616151049733162, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.025295337662100792, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.025085313245654106, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015050508081912994, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012884119525551796, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012817670591175556, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011766326613724232, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01163167878985405, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007765563670545816, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007635314017534256, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0074402401223778725, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004796311259269714, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015050508081912994, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015050508081912994, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.2.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.15261070430278778, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11991943418979645, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10657234489917755, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.0869038924574852, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06800927221775055, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.055188339203596115, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.08694365620613098, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.07863987982273102, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.07280212640762329, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05049446225166321, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.046734414994716644, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.04497513547539711, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.03868813440203667, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.03399093076586723, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0328100323677063, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.023179305717349052, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.019622480496764183, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.019280901178717613, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01668430119752884, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.015965018421411514, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013692514039576054, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.014986400492489338, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01238324772566557, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.012357900850474834, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01668430119752884, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.014986400492489338, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.2.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.15303705632686615, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.14588849246501923, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1438130885362625, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.132852703332901, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.06983128190040588, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.06764835119247437, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.07696118205785751, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.07100547105073929, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07045416533946991, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.06478189677000046, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06267935782670975, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.039067499339580536, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03385254740715027, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.033339206129312515, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03321421518921852, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.01955011487007141, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.01739751175045967, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.017369970679283142, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.016431821510195732, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.0163529384881258, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.010581746697425842, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.010834384709596634, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01041488628834486, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00771748274564743, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.01739751175045967, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.0163529384881258, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.2.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.18110142648220062, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.17282269895076752, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1703815907239914, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.15758152306079865, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08276021480560303, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08023956418037415, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09109874069690704, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08413347601890564, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08353514224290848, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07682259380817413, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07426854968070984, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04603980481624603, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.039943814277648926, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.039339829236269, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.039197973906993866, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.022951733320951462, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020061463117599487, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020022759214043617, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018878450617194176, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018786167725920677, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012004782445728779, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.011816198006272316, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011806590482592583, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.007595268078148365, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012004782445728779, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012004782445728779, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.2.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.17816008627414703, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.15801562368869781, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.14967148005962372, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.13448543846607208, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.08045072108507156, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.0724809393286705, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.09634311497211456, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.08840741217136383, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.08338534832000732, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.06947951018810272, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.06646889448165894, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.04907882213592529, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.042378321290016174, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.03879249840974808, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.03790678083896637, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.024668093770742416, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.020550794899463654, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.020228760316967964, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.01844082586467266, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.017868265509605408, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.013450195081532001, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.013660809956490993, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.012303007766604424, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.009789991192519665, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.01844082586467266, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.017868265509605408, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.3.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.04284888505935669, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.0397333949804306, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.03838880732655525, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.03424089401960373, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.019699091091752052, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.018483513966202736, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.023472489789128304, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.020965885370969772, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.020048215985298157, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.017366569489240646, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.016632957383990288, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.012054072692990303, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.01007556077092886, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.009525344707071781, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.009392118081450462, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.006096178200095892, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.004972584545612335, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.004918619059026241, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.004467241000384092, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.004381611943244934, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0032392677385360003, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.003136930987238884, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.003063659183681011, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.002148651983588934, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.017366569489240646, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.016632957383990288, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } } }, { "key": "model.layers.3.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.034684859216213226, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.03210293874144554, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.030836116522550583, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.0276874341070652, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.015838345512747765, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.014752905815839767, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.01875358261168003, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.017100993543863297, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.016128167510032654, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.014083467423915863, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.013460464775562286, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.009494753554463387, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.008168146014213562, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.007612930610775948, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.007479006424546242, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.004761882591992617, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.00400095758959651, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.003941714763641357, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0036584509070962667, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0035752300173044205, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.002544340444728732, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0025945529341697693, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0023580403067171574, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0018307814607396722, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.017100993543863297, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.015838345512747765, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.3.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.15697482228279114, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.14670483767986298, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.14342568814754486, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1299848109483719, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.07252967357635498, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.06894823163747787, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.08177299797534943, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.07496409118175507, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.07362861931324005, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06525100767612457, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06213075667619705, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.04145282879471779, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.035722702741622925, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.034627556800842285, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.03436064347624779, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02070796675980091, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.017631087452173233, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01754712499678135, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01612241007387638, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.015959065407514572, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01072924304753542, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.010418645106256008, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01030335295945406, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00651959516108036, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.017631087452173233, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.015959065407514572, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.3.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.14906978607177734, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.12132713943719864, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10907702893018723, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08641014993190765, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06694850325584412, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.055878352373838425, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.0889415293931961, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.07777565717697144, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06992912292480469, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05083500221371651, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0466916598379612, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.04497251287102699, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0390409491956234, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.034378502517938614, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.03315551578998566, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.023983867838978767, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.021030766889452934, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02063121274113655, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.018531687557697296, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.017865225672721863, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015020239166915417, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01699005253612995, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013661292381584644, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014743940904736519, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.018531687557697296, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015020239166915417, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.3.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.16912594437599182, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16052182018756866, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15795400738716125, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14520397782325745, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07874235510826111, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07591535896062851, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08687896281480789, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.080318883061409, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07957199215888977, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07234551757574081, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06965287029743195, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04449624568223953, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03875543549656868, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03803744167089462, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03788520768284798, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.022320326417684555, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02036035992205143, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020317599177360535, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019163431599736214, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019062787294387817, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012312185019254684, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013381609693169594, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012082481756806374, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010227409191429615, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012312185019254684, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012312185019254684, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.3.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.20508520305156708, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.19478024542331696, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.19170483946800232, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.176302969455719, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09521327912807465, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09185642004013062, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10517590492963791, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09711198508739471, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09624866396188736, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08748745173215866, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08412905037403107, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.053507909178733826, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.046362340450286865, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04551992937922478, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04533183574676514, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02672344259917736, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02354275807738304, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02349170483648777, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.022007672116160393, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.021867576986551285, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01423121802508831, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01436273567378521, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.013957942835986614, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009805151261389256, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01423121802508831, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01423121802508831, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.3.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.04415435716509819, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.04024975001811981, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.02907208725810051, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.02611718513071537, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.019677722826600075, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.013208595104515553, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.03694469481706619, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.031959161162376404, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.020210957154631615, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.017543727532029152, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.01779293641448021, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.017039472237229347, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.01480636466294527, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.00732303224503994, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.007718266453593969, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.004906180314719677, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.005036734044551849, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.004076271317899227, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.004686454311013222, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.003568775486201048, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.0040046451613307, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.004189726896584034, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.002618340775370598, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.002477647503837943, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.017543727532029152, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.017039472237229347, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.056826747953891754, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.05227498710155487, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.050398197025060654, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.045246671885252, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.02596086449921131, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.02420424297451973, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.030077366158366203, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.027679117396473885, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.026489701122045517, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.02298285812139511, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.021813658997416496, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.015229500830173492, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.01320220809429884, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.012436087243258953, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.012249241583049297, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.007615883368998766, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.006437665782868862, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.006363335531204939, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.005845315754413605, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.005727232899516821, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.003991191275417805, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0040277279913425446, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.003733674995601177, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0027009558398276567, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.015229500830173492, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.015229500830173492, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.0501784048974514, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.046128466725349426, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.0444062203168869, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.039870936423540115, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.02290962263941765, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.021315909922122955, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.02678552456200123, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.02455240488052368, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.02338505908846855, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.020272409543395042, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.019316181540489197, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.013571668416261673, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.011699515394866467, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.010976308025419712, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.01079852506518364, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.006787787191569805, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.005678057670593262, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.005606080871075392, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.00515554565936327, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0050421650521457195, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.003555326024070382, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0035528806038200855, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0033106449991464615, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.002369423396885395, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.013571668416261673, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.013571668416261673, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.16597430408000946, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.15473772585391998, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.15104940533638, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1367807388305664, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.07693972438573837, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07293005287647247, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.08639144897460938, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.07969542592763901, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.07816897332668304, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0689091607928276, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06535912305116653, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.04380955919623375, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.037989262491464615, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.03673817217350006, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.036442700773477554, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02185908891260624, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01869927905499935, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.018607592210173607, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.017043400555849075, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.016858315095305443, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.011273949407041073, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.011056716553866863, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01080765388906002, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.006901492364704609, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.018607592210173607, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.016858315095305443, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.4.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.18531349301338196, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.16152550280094147, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.15148381888866425, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.12204235792160034, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08670409023761749, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07685644179582596, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1047452762722969, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09530448913574219, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.08945652842521667, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06834586709737778, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06019410490989685, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05433844029903412, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.046637166291475296, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04268269240856171, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.041698917746543884, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02756175957620144, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02360808663070202, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.023236963897943497, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02028542384505272, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.019671577960252762, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015677230432629585, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016844185069203377, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014475636184215546, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013298211619257927, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015677230432629585, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015677230432629585, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17466938495635986, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16527710855007172, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.16248950362205505, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14887423813343048, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.0813552588224411, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07818978279829025, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.0897856205701828, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08307628333568573, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08229659497737885, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.0742657259106636, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07103779166936874, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04556963965296745, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.039647821336984634, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.038867976516485214, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.038680195808410645, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.0227554552257061, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.019998449832201004, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.01995040848851204, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018568405881524086, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01845518685877323, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011935748159885406, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012046636082231998, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011676015332341194, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008039894513785839, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018568405881524086, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01845518685877323, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.4.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.21485455334186554, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.20343393087387085, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.200012668967247, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.18331845104694366, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10003931820392609, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.0961926057934761, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11032575368881226, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10214022547006607, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10119592398405075, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09133424609899521, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0873786062002182, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05588412284851074, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04864185303449631, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04769522324204445, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04746979847550392, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.027873355895280838, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.024229256436228752, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.024164089933037758, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.022435735911130905, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02229444868862629, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01437978632748127, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014107011258602142, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014062287285923958, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008757935836911201, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01437978632748127, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01437978632748127, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.4.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.22271332144737244, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.19583305716514587, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.18540894985198975, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.16309453547000885, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10209158062934875, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09138406813144684, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12088833749294281, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11127932369709015, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10582570731639862, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08587164431810379, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08074735105037689, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06177295371890068, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05342428386211395, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.04923934116959572, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04822413623332977, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03101683035492897, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02592533454298973, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.025573646649718285, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.022765425965189934, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.022078117355704308, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.016783125698566437, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01696041412651539, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015470117330551147, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.011943655088543892, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.016783125698566437, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015470117330551147, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.5.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.06667207926511765, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.06128496676683426, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.058974739164114, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.052895430475473404, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.03053504228591919, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.028386902064085007, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.03545472025871277, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.03274819627404213, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.031162481755018234, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.026978299021720886, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.02553033083677292, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.017958197742700577, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.015648696571588516, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.014658216387033463, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.01442167442291975, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.008995063602924347, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.00765212532132864, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.007554356008768082, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.006948173511773348, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.006800617091357708, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.004747345577925444, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.004880389664322138, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0044205524027347565, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0033712927252054214, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.017958197742700577, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.015648696571588516, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.05775617063045502, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.05307478457689285, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.05095615237951279, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.04572635143995285, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.02648065984249115, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.024557538330554962, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.031153904274106026, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.02857050485908985, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.02702346071600914, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.023399539291858673, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.02229820378124714, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.01579653099179268, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.013642052188515663, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.01271168701350689, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.012486045248806477, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.007905354723334312, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.006608127150684595, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0065113333985209465, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.005999090149998665, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.005855808034539223, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.004161305725574493, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.004194539040327072, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0038498775102198124, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.002845712471753359, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.01579653099179268, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.01579653099179268, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.17808043956756592, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.16581353545188904, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.16175593435764313, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.14613549411296844, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08275608718395233, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07831728458404541, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.09316200017929077, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.08583182096481323, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0840904712677002, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07389259338378906, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06994634121656418, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.04728752374649048, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.040933750569820404, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.03952394798398018, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.039190471172332764, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02359832637012005, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.020141322165727615, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.020030569285154343, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.018323613330721855, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01810588873922825, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.012189097702503204, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.011936982162296772, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.011664211750030518, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.007483417168259621, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.018323613330721855, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01810588873922825, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.5.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.19116175174713135, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.16359826922416687, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1535291224718094, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.12696319818496704, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08869222551584244, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07761046290397644, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10598885267972946, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09646109491586685, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0922069177031517, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07045230269432068, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0629836693406105, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.054829664528369904, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04715925082564354, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04365646466612816, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04279256612062454, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.027900591492652893, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.024172332137823105, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.023910148069262505, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.020870041102170944, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.020332561805844307, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01590087078511715, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.017181040719151497, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014816774055361748, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01364530436694622, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01590087078511715, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01590087078511715, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1671244502067566, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.15777146816253662, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15493427217006683, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14157938957214355, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07808523625135422, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07486017048358917, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08646723628044128, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.079932302236557, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07905635237693787, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07098131626844406, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06776916235685349, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04399684816598892, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03826134651899338, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.037431471049785614, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03723132610321045, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02199653908610344, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.019445793703198433, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.01939534954726696, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018035339191555977, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017915209755301476, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011698228307068348, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012008615769445896, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011429908685386181, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00834699533879757, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018035339191555977, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017915209755301476, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.5.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.22321827709674835, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21074657142162323, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2070917785167694, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.18926066160202026, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.1042524054646492, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09996341168880463, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11528406292200089, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.1066354513168335, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10555627942085266, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09479210525751114, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09045153111219406, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05850080028176308, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05084049701690674, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04975375533103943, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04950908198952675, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.029212387278676033, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02530045621097088, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.025235144421458244, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.023348640650510788, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.023184364661574364, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015158215537667274, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014788087457418442, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014799989759922028, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009232288226485252, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015158215537667274, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015158215537667274, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.5.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.22732806205749512, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.19989262521266937, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.1888357549905777, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1661178171634674, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10438068211078644, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09317834675312042, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1244436725974083, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11443144083023071, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10820991545915604, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08788923174142838, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08261129260063171, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06373855471611023, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.055049240589141846, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.050451118499040604, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.049329206347465515, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.032087795436382294, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.026702100411057472, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02630019746720791, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.023514093831181526, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02276274934411049, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.017551099881529808, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.017697487026453018, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01612810231745243, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012645403854548931, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.017551099881529808, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01612810231745243, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.6.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.08360306173563004, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.07764257490634918, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.07529295235872269, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.06788954883813858, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.03867067024111748, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.03635382652282715, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04417671635746956, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04082999750971794, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.03934229165315628, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03449476510286331, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.032709743827581406, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.022442730143666267, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.01953008957207203, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.018577123060822487, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.018347453325986862, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.011229118332266808, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.009679148904979229, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.009591071866452694, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.008849995210766792, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.008708999492228031, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.005929534323513508, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006099987775087357, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.005611321423202753, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004220101982355118, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.018577123060822487, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.018347453325986862, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.6.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.0689196065068245, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.06406351923942566, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.061980146914720535, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.05599367246031761, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.03192964196205139, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.029961807653307915, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.03696267679333687, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.033879950642585754, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0324724055826664, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.02852475270628929, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.027205387130379677, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.018764052540063858, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.01619066670536995, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.015318688005208969, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.01510932482779026, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.009392794221639633, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.007928500883281231, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.007841422222554684, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.007245943415910006, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.007113636005669832, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.004922924563288689, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.004940044600516558, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00462933536618948, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0033143237233161926, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.01619066670536995, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.01510932482779026, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.6.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.19773827493190765, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.18484044075012207, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.18063190579414368, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.16368450224399567, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09222070127725601, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08758760988712311, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10340891778469086, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09532421082258224, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09364120662212372, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08268129825592041, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07840710133314133, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05246880277991295, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04546284303069115, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.044053856283426285, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04371850937604904, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.026172641664743423, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.022388450801372528, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.022279715165495872, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02042497880756855, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.020203247666358948, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013461115770041943, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.013163424097001553, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01292674895375967, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00815447885543108, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013461115770041943, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013461115770041943, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1904948353767395, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.16070523858070374, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.14877760410308838, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1262647658586502, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08832332491874695, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07601501047611237, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10750198364257812, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09765665233135223, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09139249473810196, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06832964718341827, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06408897042274475, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.055517327040433884, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04761505872011185, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04334555193781853, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04230564832687378, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.028017649427056313, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.023819850757718086, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.023428302258253098, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.020171618089079857, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.019495733082294464, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015788482502102852, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016872666776180267, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014471053145825863, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013148636557161808, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015788482502102852, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015788482502102852, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.6.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1647528111934662, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.15523092448711395, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15230529010295868, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1389118880033493, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07712223380804062, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07379182428121567, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08545219153165817, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.07904374599456787, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07813726365566254, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.06986329704523087, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06657426059246063, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.043435387313365936, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03778684139251709, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.036915458738803864, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03671121224761009, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.021689657121896744, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.01897972635924816, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.018928060308098793, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.017514269798994064, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017381398007273674, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.0113662825897336, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.011441976763308048, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011074868030846119, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.007604303769767284, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.017514269798994064, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017381398007273674, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.6.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.22774681448936462, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2147369235754013, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21075539290905, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1923229694366455, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10662141442298889, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10204368829727173, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11805157363414764, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10919545590877533, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10801048576831818, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09664295613765717, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09216535836458206, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05997183546423912, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.0521027110517025, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.050928253680467606, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.0506509393453598, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.029942255467176437, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.025921501219272614, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.025845348834991455, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.0238654688000679, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.023684484884142876, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015555785968899727, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015205700881779194, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015155346132814884, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009537940844893456, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015555785968899727, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015155346132814884, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.6.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.23454196751117706, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.2069493532180786, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.19593147933483124, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1726338118314743, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10789282619953156, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09673596173524857, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1284482330083847, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.1180075854063034, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11177626252174377, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09118793904781342, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.0857694000005722, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.0656733512878418, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05679060146212578, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05221277102828026, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.0510910302400589, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03304770216345787, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.027725286781787872, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.027333861216902733, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.024510303512215614, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.023772820830345154, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.018048882484436035, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.018461784347891808, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016626568511128426, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013345833867788315, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.018048882484436035, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016626568511128426, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.7.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.08202618360519409, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.07625188678503036, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.07392977178096771, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.06659185886383057, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.03799155354499817, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.03571322187781334, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.043490730226039886, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04011744260787964, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.038632795214653015, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03389938548207283, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03212488070130348, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.022077040746808052, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.019167136400938034, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.018212784081697464, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.017985960468649864, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.011042400263249874, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.009411834180355072, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.009320523589849472, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.008587821386754513, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.00844422634691, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.005784751381725073, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.005831094924360514, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.005459367297589779, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.003895900212228298, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.018212784081697464, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.017985960468649864, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.7.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.06900589168071747, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.06422094255685806, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.06209398806095123, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.056007903069257736, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.032000210136175156, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.030003970488905907, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.036985401064157486, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.034075457602739334, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.032538242638111115, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.028583118692040443, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.02716347761452198, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.018772128969430923, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.01628025248646736, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.015343797393143177, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.015118001028895378, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.009382271207869053, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.007921408861875534, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.007825261913239956, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.007231852971017361, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.007090042345225811, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0049069719389081, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.004916100762784481, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.004596753045916557, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0032559926621615887, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.01628025248646736, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.015118001028895378, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.7.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.19592076539993286, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.18315696716308594, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.17896543443202972, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.16185328364372253, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09147419035434723, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08681027591228485, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10298211872577667, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09460481256246567, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09287992119789124, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08195780217647552, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07769379019737244, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0523122176527977, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.045134708285331726, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04371214285492897, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04336913302540779, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02609626017510891, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.022228943184018135, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02211776003241539, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.020256999880075455, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.020041698589920998, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013423086144030094, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.013086814433336258, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01284495834261179, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00811856146901846, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013423086144030094, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013423086144030094, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.7.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21580109000205994, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.17866484820842743, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1657390147447586, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.13708873093128204, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09846983104944229, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08423224836587906, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11857891082763672, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1080562099814415, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1034415140748024, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07520182430744171, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06939030438661575, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.061046041548252106, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05286649614572525, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04847646877169609, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0474122017621994, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.031008239835500717, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026976536959409714, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026692712679505348, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02284128963947296, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02216743677854538, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017699582502245903, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01938512735068798, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016412939876317978, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015489398501813412, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017699582502245903, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015489398501813412, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.7.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.15824972093105316, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.14900943636894226, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.14611166715621948, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.13320502638816833, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07421731948852539, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07095275074243546, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08247625827789307, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.07620485126972198, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07522262632846832, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.06715600937604904, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06401583552360535, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.041933462023735046, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03648287430405617, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.035593632608652115, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03538042679429054, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.021008970215916634, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.01842319779098034, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.01836346834897995, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01700083166360855, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.016868913546204567, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01113804429769516, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.011288914829492569, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.010844588279724121, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.007708560209721327, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.01842319779098034, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.016868913546204567, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.7.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2278088927268982, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21454690396785736, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21058915555477142, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19197934865951538, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10700353980064392, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10235657542943954, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11871198564767838, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10969961434602737, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10840395092964172, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09685305505990982, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09242715686559677, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06051097437739372, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05250339210033417, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05127914994955063, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05099035054445267, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03022899106144905, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026491032913327217, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02641267329454422, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024446075782179832, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02426592819392681, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015968134626746178, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01614791341125965, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015565615147352219, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010937601327896118, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015968134626746178, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015565615147352219, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.7.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.22776181995868683, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.20039905607700348, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.18953470885753632, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1667463332414627, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10461614280939102, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09363412111997604, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12623077630996704, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11461343616247177, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10853280872106552, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08828477561473846, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.0831264927983284, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06436759978532791, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.055401790887117386, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.050902243703603745, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.049798280000686646, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.0328892357647419, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.027486838400363922, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.027101848274469376, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02441801317036152, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.023687321692705154, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.018733151257038116, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.018853873014450073, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01735115610063076, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014194101095199585, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01735115610063076, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01735115610063076, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.8.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09468776732683182, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08812413364648819, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08550673723220825, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07710554450750351, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04388490319252014, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.041310857981443405, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05013253912329674, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.046234481036663055, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04458804801106453, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.039170533418655396, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03715364634990692, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.025428777560591698, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02208501100540161, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.021027684211730957, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.020776625722646713, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012723354622721672, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.010870840400457382, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010771039873361588, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009926477447152138, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009767389856278896, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006677292753010988, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00673196604475379, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006320279091596603, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004513164982199669, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012723354622721672, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012723354622721672, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.8.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.07713726907968521, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.07183247804641724, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.06951393187046051, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.06273128092288971, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.03579111397266388, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.03360414505004883, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04124206677079201, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.038014501333236694, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.03638306260108948, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03197399154305458, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.030410373583436012, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.020915336906909943, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.018166186287999153, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.01716361567378044, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.01692008599638939, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.010465326718986034, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.008867893368005753, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.008766110986471176, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.008103608153760433, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.007949288934469223, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.005485017318278551, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.005507432855665684, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.005161755718290806, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.003661914262920618, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.018166186287999153, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.01692008599638939, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.8.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.20945236086845398, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.19587387144565582, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.19137664139270782, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1730785369873047, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09780902415513992, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09285908192396164, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11023716628551483, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10117632150650024, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09929834306240082, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08761821687221527, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0832299217581749, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.056020017713308334, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04826553910970688, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.046735260635614395, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.046369362622499466, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.027959857136011124, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.023770026862621307, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.023648083209991455, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02166472189128399, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.021425068378448486, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014410077594220638, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.013995589688420296, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01377695333212614, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.008687817491590977, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014410077594220638, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014410077594220638, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.8.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2000654935836792, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.17489536106586456, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1662743091583252, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.14150844514369965, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09326432645320892, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08331486582756042, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10818584263324738, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0993649810552597, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09621114283800125, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07630602270364761, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06870971620082855, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05612267926335335, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04850203916430473, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04573160037398338, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04508771747350693, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02846219390630722, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02510315552353859, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.024903802201151848, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.021988529711961746, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.021566255018115044, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0162152461707592, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01748250238597393, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0154041713103652, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013774607330560684, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0162152461707592, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0154041713103652, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.8.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.16382712125778198, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.15424038469791412, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15114445984363556, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.13766813278198242, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07709042727947235, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07363703846931458, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08566994965076447, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.07916679978370667, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07811802625656128, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.06968791782855988, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0663941502571106, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04376664757728577, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.038007091730833054, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.037061907351017, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03683207929134369, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02190246991813183, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.019333235919475555, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.019262410700321198, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.017859850078821182, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017725154757499695, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01171056181192398, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012069021351635456, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01140508707612753, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008505495265126228, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.017859850078821182, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017725154757499695, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.8.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.22965437173843384, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21627378463745117, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2121545970439911, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19332565367221832, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10778404027223587, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10305742919445038, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11985653638839722, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11060135811567307, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10920396447181702, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.0974879339337349, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09291735291481018, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06092378497123718, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05282338336110115, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05155535414814949, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.051257047802209854, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03047463670372963, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026328464969992638, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026245098561048508, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024211086332798004, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024018052965402603, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01600208878517151, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015580788254737854, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015576106496155262, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00992294680327177, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01600208878517151, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015576106496155262, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.8.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.23600709438323975, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.20761488378047943, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.19616009294986725, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.17263472080230713, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10879340767860413, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09711071103811264, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1300598531961441, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11923207342624664, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.1127113550901413, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09153631329536438, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08610285073518753, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06656850129365921, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.057628098875284195, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.052881281822919846, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.0517130084335804, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03370874747633934, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.028462953865528107, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.028047669678926468, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02520880289375782, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.024444714188575745, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.018797973170876503, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01942882128059864, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017342254519462585, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014521876350045204, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017342254519462585, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017342254519462585, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.9.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10120122134685516, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09428530931472778, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09150148183107376, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08261196315288544, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04707722365856171, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.044357042759656906, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05377550795674324, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0495995357632637, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.047855548560619354, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04209975525736809, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03997667878866196, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.027373887598514557, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02382882870733738, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02268810011446476, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.022423088550567627, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013731925748288631, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011948391795158386, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011839128099381924, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01097396295517683, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010809464380145073, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007334600668400526, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00769693311303854, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006953911855816841, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005522607825696468, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013731925748288631, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013731925748288631, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.08639858663082123, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08054393529891968, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.078011155128479, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.0705113410949707, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.040213849395513535, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.03782603144645691, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04620955139398575, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04257906228303909, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.040850117802619934, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.035969872027635574, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03422560915350914, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.023478258401155472, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.020344523712992668, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.019293954595923424, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.019033323973417282, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.011743245646357536, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.009982078336179256, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.00987539254128933, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009135759435594082, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.008975986391305923, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006170488893985748, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006212737876921892, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.005817586090415716, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004175707697868347, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.011743245646357536, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.011743245646357536, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2089414894580841, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.19540667533874512, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.19100773334503174, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.17284516990184784, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09768250584602356, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09270373731851578, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11017811298370361, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10107557475566864, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09910041838884354, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08755180984735489, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08313264697790146, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05600592866539955, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04823809117078781, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.046687472611665726, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04631023108959198, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.027954448014497757, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.023792598396539688, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.023670218884944916, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.021707596257328987, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.021474648267030716, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014457782730460167, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.014093417674303055, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013832381926476955, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.008850762620568275, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014457782730460167, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014457782730460167, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21219860017299652, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1814066469669342, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.16940852999687195, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.13685230910778046, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09821909666061401, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08568473160266876, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11948256194591522, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10877219587564468, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10172130912542343, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0775359570980072, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06852535903453827, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0618722066283226, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05338366702198982, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04859067499637604, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.047413527965545654, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.031516071408987045, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02738196775317192, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026944661512970924, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023761345073580742, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023033615201711655, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.018298771232366562, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.02007070556282997, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01687716878950596, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.016257980838418007, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.018298771232366562, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.016257980838418007, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.9.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1658029407262802, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.15605323016643524, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15295696258544922, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1394115537405014, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07800480723381042, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07452262938022614, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08667872846126556, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08015945553779602, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07905010133981705, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07050897926092148, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06711427122354507, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.044244345277547836, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.038455307483673096, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.0374724343419075, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.037246666848659515, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.022118357941508293, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.019508028402924538, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.019440708681941032, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01800844445824623, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017867954447865486, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01175296027213335, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012118076905608177, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011432060040533543, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.0084684444591403, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01800844445824623, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017867954447865486, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.9.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2307203859090805, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2173214703798294, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21314425766468048, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19420187175273895, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10834237933158875, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10359051823616028, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12036366760730743, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11119379103183746, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10979572683572769, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09798194468021393, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0932052955031395, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.061145756393671036, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.053095899522304535, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.051806483417749405, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05149848759174347, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03052549436688423, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026396896690130234, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02630480006337166, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.0242453645914793, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024049898609519005, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01585676148533821, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015528335236012936, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015414833091199398, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009773955680429935, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01585676148533821, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015414833091199398, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.9.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.24314218759536743, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.21473973989486694, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.20339757204055786, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1789628267288208, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11243179440498352, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10073249787092209, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13368044793605804, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12278655171394348, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11645909398794174, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09488162398338318, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08922944962978363, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.068697489798069, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.059517040848731995, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05478115379810333, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.053643565624952316, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.034752778708934784, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.029712431132793427, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.029298745095729828, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02641913667321205, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.0256746094673872, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01944863423705101, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.020486541092395782, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017983656376600266, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.015592030249536037, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017983656376600266, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.015592030249536037, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10738332569599152, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10005556046962738, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09720451384782791, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.087666355073452, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04989749565720558, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.047040849924087524, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.056765660643577576, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05242229625582695, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05067986249923706, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04457715153694153, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04222079738974571, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.028851915150880814, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02510065585374832, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02395503781735897, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.023674217984080315, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01444530300796032, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012437643483281136, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012331224977970123, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011382721364498138, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011208775453269482, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007611832581460476, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007774862926453352, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007229379378259182, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005317282862961292, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01444530300796032, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01444530300796032, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.08769377321004868, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08172715455293655, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.07912178337574005, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07143411040306091, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.040726952254772186, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.03828706219792366, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04680256545543671, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04318686202168465, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04138448089361191, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.036411698907613754, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03459538146853447, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.023731425404548645, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02063835971057415, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.01953265629708767, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.019265923649072647, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.011877480894327164, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.010099097155034542, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.009985639713704586, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009234298020601273, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009065087884664536, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0062354616820812225, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00627797981724143, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.005877522751688957, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004196408204734325, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.011877480894327164, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.011877480894327164, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21041473746299744, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.19666707515716553, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.19187067449092865, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.17344418168067932, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09825370460748672, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09313846379518509, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11076341569423676, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10207803547382355, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09973211586475372, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08807218819856644, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08336705714464188, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.056241557002067566, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04873558506369591, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04698202759027481, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.046568308025598526, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.028074007481336594, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.023970915004611015, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02381858229637146, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02186933346092701, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.021603848785161972, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014518248848617077, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.014269457198679447, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013879308477044106, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009006472304463387, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014518248848617077, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014518248848617077, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22481779754161835, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2041700780391693, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1949196755886078, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.166335791349411, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10612131655216217, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09699076414108276, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12534019351005554, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1148461326956749, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10842447727918625, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08981619030237198, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08142706751823425, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06486805528402328, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05623139813542366, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05218511447310448, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05120443180203438, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.032818492501974106, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.028786225244402885, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02838224731385708, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02578059583902359, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.025187388062477112, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01854082942008972, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.020334942266345024, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01733466610312462, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.016071666032075882, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01854082942008972, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.016071666032075882, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.10.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.16417528688907623, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.15434153378009796, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1511794626712799, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.13749468326568604, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07730977982282639, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.0737563893198967, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08598002791404724, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.07951198518276215, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07834918051958084, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.06974728405475616, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06635762006044388, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.043983396142721176, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.0382159985601902, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.037218768149614334, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03698762506246567, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02201422117650509, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.01952284574508667, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.01944650337100029, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018030840903520584, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01788460835814476, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011821331456303596, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01232946664094925, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011500151827931404, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008839909918606281, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018030840903520584, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01788460835814476, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.10.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2300620675086975, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2163468897342682, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21210671961307526, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19295734167099, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10791942477226257, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10306855291128159, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12005835771560669, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11084109544754028, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10942739993333817, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09739822894334793, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09261798858642578, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.060901302844285965, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05293243005871773, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.0516246072947979, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05131221190094948, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030474206432700157, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02633577585220337, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026243843138217926, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02415907196700573, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.023958759382367134, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015908407047390938, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01554866787046194, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015467756427824497, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00985358189791441, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015908407047390938, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015467756427824497, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.10.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2503129839897156, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22202488780021667, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.2111203819513321, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1858110874891281, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11591873317956924, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10451756417751312, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13625001907348633, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12547649443149567, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11972774565219879, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.0980282872915268, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.0919037014245987, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.0700094923377037, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06059093028306961, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.056248269975185394, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.055179037153720856, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.035372618585824966, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.030083002522587776, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02971995808184147, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.026669932529330254, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02597305178642273, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01969229057431221, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.02020234428346157, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01838698238134384, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014959253370761871, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01838698238134384, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014959253370761871, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10714389383792877, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09985686838626862, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09694831073284149, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08745644986629486, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04983420670032501, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04697250574827194, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.056789930909872055, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0524471290409565, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.050642985850572586, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04449585825204849, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04217509925365448, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.028867006301879883, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.025079850107431412, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.023893628269433975, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02361394464969635, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014437287114560604, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012360543943941593, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012252623215317726, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011295105330646038, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011118365451693535, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007579611614346504, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007657697424292564, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007177405990660191, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005141455680131912, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014437287114560604, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014437287114560604, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.08890184760093689, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08291712403297424, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08030731976032257, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.0724685937166214, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04139397293329239, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.03889524191617966, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.047483231872320175, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.043912533670663834, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04203552380204201, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03700757399201393, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03509969636797905, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.024105381220579147, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.020977022126317024, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.01984277367591858, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.019568229094147682, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012054595164954662, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.010232559405267239, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010110771283507347, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009349999949336052, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009175301529467106, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0063086459413170815, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006326469127088785, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.005941412877291441, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004180337302386761, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012054595164954662, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012054595164954662, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21219666302204132, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.19821691513061523, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.19333213567733765, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1746021956205368, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09911688417196274, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09391183406114578, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11203629523515701, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1030750647187233, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10063597559928894, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08869926631450653, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08392293751239777, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05687761679291725, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.049213510006666183, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04742129147052765, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04698316380381584, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.028396151959896088, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.024212578311562538, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02405422180891037, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02206801436841488, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02178802900016308, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014695120975375175, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01444078516215086, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01402940321713686, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009137704968452454, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014695120975375175, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014695120975375175, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2228766232728958, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.19833308458328247, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.18757522106170654, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.15950310230255127, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10343421995639801, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09354287385940552, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1249702051281929, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11467994004487991, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10761047899723053, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08710236102342606, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07994701713323593, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06448958814144135, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05586063489317894, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05070158466696739, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.049429312348365784, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03250046446919441, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027777373790740967, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02732842229306698, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024753035977482796, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023974116891622543, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.018012577667832375, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.019606584683060646, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016420463100075722, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015170329250395298, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.018012577667832375, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015170329250395298, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.11.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1629355251789093, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1530054360628128, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.14979609847068787, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1360938400030136, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07675683498382568, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07317757606506348, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.0855197161436081, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.07906455546617508, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07780537009239197, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.0691559687256813, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06575483083724976, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.043735794723033905, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03803100809454918, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03699737414717674, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.036746397614479065, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.021894438192248344, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.01944027468562126, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.01937040500342846, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01794774830341339, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017798732966184616, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011764980852603912, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012350869365036488, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011421360075473785, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008922002278268337, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01794774830341339, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017798732966184616, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.11.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.22663173079490662, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21287809312343597, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20862193405628204, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.18942874670028687, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10634077340364456, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10146541148424149, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1183401495218277, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10933195054531097, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10782737284898758, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09580745548009872, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09089596569538116, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.060088034719228745, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05221260339021683, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.050866447389125824, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05054491013288498, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.029998065903782845, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.025936197489500046, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.025845486670732498, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.023760192096233368, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.023560117930173874, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015540743246674538, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015312476083636284, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015069989487528801, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009678096510469913, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015540743246674538, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015069989487528801, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.11.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.25196242332458496, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22452402114868164, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.21397100389003754, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18780960142612457, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11683332175016403, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10590319335460663, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13733936846256256, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12624411284923553, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12062995135784149, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09918493032455444, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09287077188491821, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07060255110263824, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06099319830536842, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.056629739701747894, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.055576078593730927, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03571995720267296, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.030196139588952065, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.029835397377610207, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02680138498544693, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02611556276679039, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01989756152033806, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.02015084959566593, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.018564501777291298, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014807536266744137, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.018564501777291298, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.018564501777291298, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.12.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11326387524604797, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1055728867650032, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10255714505910873, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09238289296627045, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05269502103328705, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04971141368150711, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05994780734181404, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05535072088241577, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0535486564040184, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04704492539167404, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04452984407544136, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03042987920343876, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02645030990242958, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.025264184921979904, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.024973297491669655, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015224909409880638, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013004103675484657, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012889191508293152, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011857384815812111, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011678263545036316, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007945306599140167, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007963201962411404, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007543583400547504, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005240398924797773, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015224909409880638, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015224909409880638, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09356202930212021, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08727557212114334, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08454444259405136, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07629372179508209, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04358448088169098, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04097391664981842, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04995097219944, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04618597775697708, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04425270855426788, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03893916681408882, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.036954205483198166, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.025322889909148216, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.022060414776206017, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.020885786041617393, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.020599013194441795, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01266393531113863, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.010756133124232292, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010637685656547546, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009820847772061825, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009640108793973923, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006617601495236158, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006624419242143631, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0062447478994727135, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004344023298472166, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01266393531113863, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01266393531113863, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21744604408740997, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.20311543345451355, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.19797423481941223, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.17862020432949066, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10173299908638, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0962415337562561, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11500716954469681, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1058843806385994, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10326886177062988, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09091348201036453, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08600644022226334, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.058418210595846176, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05062899738550186, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04867829009890556, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04821314290165901, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.029213247820734978, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.024928197264671326, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.024755755439400673, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0227100420743227, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.022426243871450424, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015205043368041515, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.014990461058914661, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014513841830193996, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009632359258830547, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015205043368041515, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.014990461058914661, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.12.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22993189096450806, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2060542106628418, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.19782008230686188, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.17140579223632812, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10762178897857666, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09864629805088043, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12501376867294312, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11432216316461563, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1105935275554657, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09100837260484695, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08337799459695816, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06434251368045807, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05553833395242691, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0524483360350132, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.051723696291446686, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03238718956708908, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.028356559574604034, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.028129613026976585, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02524927444756031, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024786237627267838, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017930442467331886, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.019270779564976692, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01697351038455963, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014780642464756966, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017930442467331886, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01697351038455963, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.12.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.16734261810779572, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.15713559091091156, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1538132280111313, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1397356539964676, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07886216044425964, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07516968250274658, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08792132139205933, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08124668896198273, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07999400794506073, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07103496044874191, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06752516329288483, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04497501254081726, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03912073373794556, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.038055211305618286, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03779926896095276, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.022522658109664917, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020041150972247124, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.01996694505214691, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018507665023207664, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018351977691054344, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012122263200581074, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01279563270509243, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011781079694628716, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009302445687353611, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018507665023207664, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018351977691054344, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.12.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.22870969772338867, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2147298902273178, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21037761867046356, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1910347193479538, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.1074351966381073, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10245029628276825, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1195356547832489, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11042521893978119, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.1089276671409607, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09670305252075195, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09179753065109253, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06082243472337723, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.052745986729860306, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.051381081342697144, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05105527117848396, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030361494049429893, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026173776015639305, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026078887283802032, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.023964200168848038, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.023756759241223335, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.0157603919506073, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015417022630572319, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015285194851458073, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009697881527245045, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.0157603919506073, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015285194851458073, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.12.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.254142701625824, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22623507678508759, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.21558360755443573, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18968108296394348, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11798965930938721, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10683263838291168, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1387496292591095, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12734244763851166, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12181256711483002, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.10014211386442184, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09393133968114853, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07145073264837265, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.061595600098371506, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.057291429489851, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.056231655180454254, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03621708229184151, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.030667949467897415, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.03031308576464653, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.0272586178034544, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.026568703353405, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.020306942984461784, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.020607363432645798, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.018990522250533104, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.015301507897675037, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.015301507897675037, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.015301507897675037, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12173359841108322, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11346404254436493, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11027400195598602, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09943791478872299, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05671060457825661, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0535196028649807, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06434914469718933, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.059439048171043396, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05761224031448364, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05066625401377678, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.047960780560970306, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03271356597542763, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02846900187432766, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02722833678126335, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.026933150365948677, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016372134909033775, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014142927713692188, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01402435451745987, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012927446514368057, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012745575979351997, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008614319376647472, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00881626084446907, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00819897186011076, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.006027986295521259, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016372134909033775, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016372134909033775, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.096815325319767, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09027543663978577, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08732547610998154, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07877668738365173, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04505395516753197, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.042302921414375305, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05183516442775726, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04790688306093216, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04575321078300476, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.040283203125, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.038235295563936234, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.026304515078663826, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.022898748517036438, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.021611249074339867, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.021300673484802246, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013160787522792816, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011144574731588364, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01100595947355032, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010177320800721645, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009979293681681156, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00688326358795166, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006898957770317793, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006475470028817654, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00454261340200901, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013160787522792816, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013160787522792816, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22687581181526184, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.21193955838680267, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.20672084391117096, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.18680208921432495, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.1061839684844017, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10058580338954926, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11985990405082703, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11030091345310211, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10783402621746063, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09498794376850128, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08991055935621262, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06093442067503929, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05277351289987564, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.050846751779317856, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05038346350193024, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.030456632375717163, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02603042684495449, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.025858720764517784, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023724818602204323, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023433664813637733, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01583406701683998, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01562647521495819, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015133137814700603, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010027876123785973, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01583406701683998, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015133137814700603, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.13.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22974294424057007, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2020837962627411, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.19313585758209229, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.16507770121097565, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10776688158512115, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09752635657787323, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12499435245990753, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11429446190595627, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11110583692789078, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0891333520412445, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08108000457286835, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06489647924900055, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.055827174335718155, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05285823345184326, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05214643478393555, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03276723623275757, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.028818761929869652, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.028624599799513817, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025350313633680344, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02490297518670559, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.018319886177778244, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01984519138932228, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.017423231154680252, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01549768727272749, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.018319886177778244, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01549768727272749, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.13.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1624901294708252, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.15251459181308746, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.14927196502685547, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1355520635843277, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07665848731994629, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07304263859987259, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08538617193698883, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.07894252240657806, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07775243371725082, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.06898202002048492, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06555638462305069, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04368630051612854, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03800995647907257, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03696821257472038, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03672909364104271, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.021888745948672295, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.01944277063012123, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.01937122829258442, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01794121228158474, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01779194176197052, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011790858581662178, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012371717020869255, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01145838387310505, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008954166434705257, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01794121228158474, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01779194176197052, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.13.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.22661183774471283, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21278181672096252, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20845893025398254, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1893521249294281, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.1064508929848671, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10154594480991364, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11844160407781601, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10939747095108032, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.1079578846693039, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09583396464586258, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09101361781358719, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06023446470499039, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05225787311792374, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.0509234294295311, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05060253292322159, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030087940394878387, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02595834992825985, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.025863122195005417, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.023757804185152054, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.023552490398287773, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015647483989596367, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015296177938580513, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015188408084213734, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009637721814215183, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015647483989596367, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015188408084213734, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.13.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.25107747316360474, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22373060882091522, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.21325619518756866, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18780088424682617, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11629263311624527, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10533683747053146, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1371704339981079, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12575161457061768, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12011292576789856, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.0989266112446785, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09285186976194382, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07031651586294174, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.060632333159446716, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05633644759654999, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.055285096168518066, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03534797951579094, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02998177334666252, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02962176501750946, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.026633361354470253, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.025940433144569397, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.019360223785042763, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.0199318565428257, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01798618957400322, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014559296891093254, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01798618957400322, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01798618957400322, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.14.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12254822254180908, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.114415243268013, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1113247275352478, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10050924122333527, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05717013031244278, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05401790142059326, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06478665769100189, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05981654301285744, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05804915353655815, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.051134493201971054, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04846512898802757, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0329207181930542, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02859952673316002, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.027387503534555435, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02710079587996006, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016453109681606293, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014113270677626133, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01400031615048647, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01289580762386322, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012715077959001064, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008589234203100204, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008634140715003014, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008173972368240356, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005707667209208012, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016453109681606293, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016453109681606293, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09645386785268784, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09007109701633453, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08723524957895279, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07886086404323578, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04496915638446808, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04230441153049469, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05154592916369438, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04767860844731331, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04565294831991196, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04027572646737099, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03822218254208565, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.026134047657251358, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.022762756794691086, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.021539919078350067, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.021242113783955574, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013058988377451897, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011074020527303219, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01094643771648407, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010118735954165459, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009936043992638588, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006806665565818548, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006789575796574354, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006416970863938332, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004413955844938755, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013058988377451897, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013058988377451897, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22981208562850952, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2150527983903885, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.2096593677997589, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.18971765041351318, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10783727467060089, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10212334990501404, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1220056489109993, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11225531995296478, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10943032056093216, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09668640047311783, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09156714379787445, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.062005359679460526, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05375007167458534, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.051636192947626114, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05112646520137787, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.031004438176751137, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026378270238637924, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026181330904364586, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024073295295238495, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023754235357046127, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016114311292767525, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015783630311489105, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015367545187473297, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010006156750023365, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016114311292767525, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015367545187473297, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.14.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23516161739826202, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.21241450309753418, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.20533137023448944, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.17739936709403992, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10997109115123749, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10151363909244537, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12581250071525574, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11524113267660141, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11256058514118195, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09329729527235031, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08553416281938553, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06535696238279343, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.056469496339559555, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05415768921375275, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.053605832159519196, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03344062715768814, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.029914019629359245, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.029764972627162933, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.026854030787944794, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02649601176381111, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.019545719027519226, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.020910507068037987, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.018870238214731216, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01679145358502865, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01679145358502865, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01679145358502865, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.14.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.170125812292099, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.15972860157489777, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15637020766735077, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14203642308712006, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08023334294557571, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.0764550045132637, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08930432051420212, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08263444155454636, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08135363459587097, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.072235107421875, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06860611587762833, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04565034061670303, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03972043842077255, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03863487020134926, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.038383327424526215, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02283926121890545, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020235693082213402, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020161710679531097, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018656380474567413, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01849769987165928, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012210212647914886, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012760783545672894, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011857717297971249, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009110983461141586, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01849769987165928, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01849769987165928, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.14.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.22895479202270508, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21502457559108734, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21070356667041779, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1913987398147583, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10763969272375107, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10264432430267334, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11959420889616013, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11061635613441467, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.1091289222240448, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09691434353590012, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09193208813667297, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06080697104334831, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05282833054661751, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05146753042936325, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.051145412027835846, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030331749469041824, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.0262137558311224, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026115823537111282, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024000082165002823, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.023793285712599754, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015692507848143578, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015426209196448326, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015225590206682682, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009675645269453526, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015692507848143578, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015225590206682682, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.14.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2544389069080353, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.2254633903503418, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.21406756341457367, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18816858530044556, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11779001355171204, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10591500252485275, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1391463428735733, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.1279400736093521, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12181761115789413, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09956090152263641, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09319815784692764, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07126319408416748, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.061628103256225586, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05700185149908066, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05585732311010361, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03596873953938484, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.03018411062657833, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.029797077178955078, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02664867416024208, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.025898152962327003, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.0197649784386158, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.019945569336414337, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.018319949507713318, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014340821653604507, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.018319949507713318, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.018319949507713318, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.15.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11885828524827957, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11097140610218048, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10798613727092743, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09751908481121063, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05547480285167694, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.052404604852199554, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06286078691482544, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05806116759777069, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.056301508098840714, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04965709522366524, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0470799021422863, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.031972162425518036, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02775999717414379, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026574311777949333, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.026287216693162918, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015983054414391518, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013663331978023052, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013551304116845131, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012483739294111729, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012306427583098412, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008344844914972782, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008324268274009228, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007944991812109947, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005455158185213804, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015983054414391518, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015983054414391518, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09645450115203857, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09008175879716873, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08731687068939209, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07888524234294891, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04496832191944122, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04232916980981827, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05143403261899948, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04763506352901459, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.045666661113500595, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04028872400522232, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03820414841175079, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.026094619184732437, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.022756628692150116, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.021547731012105942, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.021258849650621414, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013038409873843193, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011093508452177048, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01096918061375618, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01014312356710434, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.00995780061930418, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006814038380980492, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006818121764808893, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0064337290823459625, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004458404146134853, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013038409873843193, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013038409873843193, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.15.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2353736311197281, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22032004594802856, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21500888466835022, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1945924460887909, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11047416925430298, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10474340617656708, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12465565651655197, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11469022929668427, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11207050085067749, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09904181212186813, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09385137259960175, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06334730982780457, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0548708550632, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.052883025258779526, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.052417412400245667, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.031664326786994934, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027024300768971443, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026840735226869583, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024668894708156586, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0243627168238163, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016444873064756393, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016146793961524963, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015719149261713028, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010248815640807152, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016444873064756393, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015719149261713028, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.15.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22524791955947876, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.20293140411376953, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.19342492520809174, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.16860981285572052, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10574028640985489, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09612690657377243, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12584954500198364, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11484529078006744, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10847180336713791, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08974389731884003, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08314357697963715, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06479154527187347, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.055560290813446045, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0513719841837883, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.050362471491098404, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03260799124836922, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027361592277884483, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02694959193468094, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024255851283669472, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023596666753292084, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017689934000372887, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01826529949903488, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016326546669006348, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013351255096495152, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017689934000372887, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016326546669006348, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.15.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17037123441696167, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.15998123586177826, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15668031573295593, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14232157170772552, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08039315789937973, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07661008089780807, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08950010687112808, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08279174566268921, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.0815240889787674, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07237701117992401, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06876222789287567, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04573877155780792, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.039835963398218155, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.038753800094127655, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03849697485566139, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02292131446301937, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020355990156531334, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02028127759695053, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018778139725327492, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018621345981955528, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012317475862801075, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01291555818170309, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011971675790846348, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009303523227572441, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018621345981955528, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018621345981955528, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.15.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23003628849983215, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21613354980945587, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21186406910419464, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19249799847602844, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10813024640083313, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.1031210646033287, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1202707514166832, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.1111050546169281, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10964374989271164, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09736725687980652, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09236262738704681, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06110837683081627, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.053095635026693344, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05173633620142937, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05141491815447807, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030548028647899628, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026410110294818878, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026316577568650246, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024189021438360214, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02398529462516308, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01593467965722084, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01561782881617546, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015476852655410767, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009923623874783516, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01593467965722084, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015476852655410767, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.15.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2554647624492645, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.2265150099992752, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.2151983380317688, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1895032376050949, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11817186325788498, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10652777552604675, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13957534730434418, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12832176685333252, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12231197953224182, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.10009849071502686, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09397119283676147, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.0716526135802269, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06174665689468384, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05709509924054146, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05595635622739792, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03615203872323036, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.030090129002928734, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02970738522708416, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.026555689051747322, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02580176293849945, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.019844362512230873, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.019702134653925896, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.018403150141239166, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013960652984678745, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.018403150141239166, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.018403150141239166, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.16.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.14100594818592072, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1310347616672516, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.12698090076446533, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.11425241827964783, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06609834730625153, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.06206667050719261, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.07569222152233124, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06976344436407089, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06720125675201416, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.058712881058454514, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0555838868021965, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.038526203483343124, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.033411700278520584, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.031756434589624405, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.03136284649372101, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01927482709288597, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.016452593728899956, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.016293762251734734, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.014968239702284336, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.014719612896442413, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.010126147419214249, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.010244708508253098, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.009581905789673328, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.006916005630046129, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.016452593728899956, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.014968239702284336, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.16.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11900871992111206, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11066549271345139, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1069236472249031, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09627801179885864, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.055802226066589355, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05224599316716194, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06441224366426468, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05937979742884636, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05674755945801735, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04961233586072922, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04706636443734169, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03276025131344795, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.028436876833438873, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02683536522090435, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.026445601135492325, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016405053436756134, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013885894790291786, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013720578514039516, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012634833343327045, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012390942312777042, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008602675050497055, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008663215674459934, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008091975934803486, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005802031606435776, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016405053436756134, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016405053436756134, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.24981537461280823, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.23301199078559875, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22712671756744385, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.204799622297287, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11751338094472885, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.11107548326253891, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.13281625509262085, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.12211538106203079, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11935567110776901, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10470219701528549, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09908507764339447, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06755801290273666, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05838881805539131, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05624144524335861, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05571163445711136, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03374321013689041, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02862253040075302, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.028437158092856407, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025956004858016968, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.025617819279432297, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017378514632582664, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016925524920225143, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01658674143254757, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010494627989828587, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017378514632582664, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01658674143254757, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.16.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09822241216897964, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09237319231033325, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09058614820241928, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08274924755096436, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0464867427945137, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.044372230768203735, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05212896689772606, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04773677513003349, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.047030869871377945, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04205091670155525, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04037817195057869, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.027048448100686073, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02347320318222046, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02290407195687294, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02276437357068062, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013902504928410053, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012698162347078323, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012646099552512169, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011874193325638771, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011797210201621056, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008133964613080025, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00885862484574318, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00797887984663248, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.007205276750028133, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013902504928410053, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013902504928410053, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.16.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17150543630123138, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1606856733560562, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15719786286354065, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1422320306301117, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08104026317596436, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07708881795406342, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09043215960264206, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08349523693323135, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08220931142568588, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07260926067829132, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06878486275672913, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04633808881044388, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04017100855708122, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.039059899747371674, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.0388031043112278, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.0232212133705616, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.0204574353992939, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020388316363096237, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018789907917380333, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018628276884555817, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012481163255870342, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01289832778275013, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01212379615753889, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009203181602060795, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018628276884555817, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018628276884555817, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.16.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.24088528752326965, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22574295103549957, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.22104193270206451, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.20004743337631226, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11348097026348114, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10800206661224365, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12667851150035858, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11677111685276031, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11512655019760132, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10168417543172836, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0963544175028801, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06453461199998856, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05588430538773537, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05439043045043945, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05403482913970947, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03232721611857414, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.0278797410428524, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02777777798473835, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.025457510724663734, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.025235626846551895, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.017035016790032387, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.016675598919391632, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01653856784105301, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010836437344551086, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.017035016790032387, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01653856784105301, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.16.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.24268679320812225, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.21995452046394348, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.21111853420734406, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18681055307388306, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11326967179775238, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10416321456432343, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13235102593898773, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.1213228777050972, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11631140857934952, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09804966300725937, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09194940328598022, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06777410209178925, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05860951170325279, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.054926734417676926, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.054033320397138596, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03416385501623154, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.029258524999022484, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.0289466455578804, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02632278762757778, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.025747807696461678, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.018822697922587395, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01941114291548729, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01766195520758629, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014287818223237991, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01766195520758629, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01766195520758629, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.17.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11827549338340759, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11031404137611389, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10691886395215988, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09642314165830612, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05553468316793442, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05224832519888878, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06362154334783554, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05859823152422905, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.056419968605041504, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04953121393918991, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04698340222239494, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.032473690807819366, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02822137251496315, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02681976929306984, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02648516371846199, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0163059514015913, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014138447120785713, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013993950560688972, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012953950092196465, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012748470529913902, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00872274860739708, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.009129881858825684, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008265003561973572, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.006568365264683962, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0163059514015913, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0163059514015913, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.17.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10422194004058838, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09717018157243729, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09405890107154846, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08478151261806488, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.048876725137233734, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04591483250260353, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.0563448928296566, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05185021460056305, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04965098947286606, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04361386224627495, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04142090305685997, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0286859143525362, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.024843614548444748, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.023487959057092667, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.023159975185990334, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014365199953317642, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012176861986517906, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012037370353937149, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011119804345071316, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010915543884038925, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007561113219708204, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007611021865159273, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007118618115782738, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0051335059106349945, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014365199953317642, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014365199953317642, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.17.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23600785434246063, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22055795788764954, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21516460180282593, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19436120986938477, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11101162433624268, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10513991862535477, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12554503977298737, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11528635025024414, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11267255246639252, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09923455119132996, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09396189451217651, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06393163651227951, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05513785034418106, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.053150493651628494, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.052673086524009705, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03192189708352089, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027100972831249237, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026926616206765175, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024663617834448814, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024349544197320938, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016502518206834793, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016096267849206924, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015759296715259552, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010095208883285522, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016502518206834793, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015759296715259552, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.17.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.18054844439029694, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1607697606086731, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.15415462851524353, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.13510023057460785, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08473093062639236, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07685966044664383, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.09847961366176605, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.08982766419649124, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.08705000579357147, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07171289622783661, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06634063273668289, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05088720843195915, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04399748891592026, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04168204590678215, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.041138339787721634, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.025746379047632217, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.023037321865558624, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02287111058831215, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.020649658516049385, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.020304379984736443, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014632859267294407, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016192574054002762, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013931188732385635, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.012942520901560783, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014632859267294407, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014632859267294407, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.17.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17033620178699493, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.15977144241333008, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1562897115945816, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14154386520385742, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08052115142345428, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07662249356508255, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08993300050497055, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.082944355905056, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.0816672220826149, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07221287488937378, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06851315498352051, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04606644809246063, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03988705947995186, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03878674656152725, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03852742910385132, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.023089589551091194, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02028249390423298, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020203260704874992, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01863497495651245, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01847323589026928, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012422955594956875, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012738246470689774, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012066642753779888, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00904006790369749, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01847323589026928, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01847323589026928, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.17.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2392924576997757, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2244521826505661, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2198106348514557, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19914086163043976, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11280936747789383, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10745948553085327, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12578395009040833, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11604388803243637, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11439955979585648, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10121352225542068, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09585950523614883, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.0640430748462677, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05549515038728714, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05401875451207161, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05367068573832512, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.032005637884140015, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02758416347205639, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02748018503189087, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.025193460285663605, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.0249665267765522, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01670481078326702, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.016343524679541588, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016202230006456375, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010410627350211143, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01670481078326702, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016202230006456375, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.17.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.24640339612960815, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22225628793239594, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.21262939274311066, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18835444748401642, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11496148258447647, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10514597594738007, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13509176671504974, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12380273640155792, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11828151345252991, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09921345114707947, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09323740750551224, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06943260133266449, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06000421941280365, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.055962853133678436, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05498725175857544, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.035100385546684265, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.03021067939698696, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02986541949212551, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.027234606444835663, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.026599984616041183, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.0195507500320673, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.02055419236421585, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.018281424418091774, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.015579046681523323, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.018281424418091774, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.015579046681523323, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11276041716337204, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10511764883995056, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10190540552139282, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09189226478338242, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05277211219072342, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04965471848845482, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06038317829370499, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.055696360766887665, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05361638963222504, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04710172489285469, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04463677108287811, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03078746423125267, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02670125663280487, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.025384047999978065, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.025067878887057304, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015415489673614502, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013211620040237904, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013083428144454956, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012081588618457317, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011882023885846138, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008142221719026566, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00830800924450159, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007700307760387659, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005716321524232626, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015415489673614502, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015415489673614502, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09514141827821732, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08868540823459625, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.0856882780790329, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07731452584266663, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04448414966464043, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04172486439347267, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05145691707730293, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.047473032027482986, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.045196570456027985, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.039738237857818604, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03772835060954094, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02615009807050228, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.022714627906680107, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.021380774676799774, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.021056726574897766, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013085279613733292, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011079447343945503, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010935118421912193, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010123306885361671, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009916731156408787, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00688367523252964, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006941765081137419, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006459169089794159, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00466518010944128, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013085279613733292, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013085279613733292, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2305460125207901, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2154892534017563, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21005290746688843, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1897062063217163, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10830794274806976, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.1024790108203888, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12280717492103577, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11279280483722687, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10994366556406021, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09682853519916534, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09169721603393555, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06245172396302223, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0539374016225338, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05185955762863159, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05137050896883011, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0311849657446146, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026495689526200294, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026309214532375336, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02412734180688858, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02381567470729351, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01615520380437374, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0158533975481987, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01538639236241579, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010065809823572636, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01615520380437374, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01538639236241579, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.18.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21926620602607727, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.19384263455867767, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1843651682138443, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1562696099281311, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10308533906936646, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09337770938873291, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12059912085533142, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11072687059640884, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10594164580106735, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08421029895544052, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07692833244800568, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06262563169002533, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.054185137152671814, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05068296939134598, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.049837034195661545, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.031766247004270554, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02799610234797001, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027716366574168205, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024566691368818283, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02404368296265602, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01803433895111084, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.019759001210331917, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01699915900826454, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01571723259985447, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01803433895111084, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01571723259985447, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17225073277950287, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16154725849628448, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15805953741073608, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14318495988845825, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08137197047472, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07743366807699203, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09081082791090012, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08389484137296677, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.082525834441185, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.0730685219168663, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06926818937063217, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04654702916741371, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04040217399597168, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03927312418818474, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03899756446480751, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02333921007812023, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02065424807369709, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020571717992424965, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019015351310372353, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01884840987622738, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.0125941326841712, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013142755255103111, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01223019976168871, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009515481069684029, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.0125941326841712, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.0125941326841712, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.18.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2369222342967987, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22227951884269714, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2176935076713562, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19718819856643677, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11146628856658936, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10616502165794373, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12445239722728729, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11471301317214966, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11307403445243835, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10008469969034195, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09489419311285019, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.0632370188832283, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05485335737466812, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05340421572327614, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05304821580648422, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.031644728034734726, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02731902524828911, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02721535786986351, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024972476065158844, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024750186130404472, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01655765436589718, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01625337079167366, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016066035255789757, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010446581058204174, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01655765436589718, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016066035255789757, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.18.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.25814539194107056, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.23066270351409912, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.2198910266160965, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.19433453679084778, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.12005288153886795, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10890847444534302, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.14107324182987213, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12984278798103333, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12388211488723755, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.102597177028656, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09641649574041367, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07246082276105881, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06286850571632385, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.058391764760017395, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05730009078979492, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.036646973341703415, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.03139632195234299, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.031015004962682724, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02808593027293682, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.027373280376195908, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.020378373563289642, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.02126934565603733, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.0190413985401392, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.015955420210957527, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.015955420210957527, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.015955420210957527, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10948725789785385, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10214193165302277, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09900978207588196, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08927328884601593, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.051237158477306366, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04818516597151756, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05874018743634224, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05418967083096504, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.052041418850421906, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04572893679141998, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04339772090315819, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.029890771955251694, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.025946106761693954, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.024591581895947456, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.024270493537187576, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014966176822781563, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012743107974529266, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012603303417563438, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011635009199380875, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011433630250394344, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007857726886868477, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00793819222599268, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007406140211969614, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005346015095710754, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014966176822781563, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014966176822781563, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09324230253696442, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08701010793447495, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08405781537294388, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07584983855485916, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04358415678143501, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.040872957557439804, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05044379085302353, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04658571258187294, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04427812993526459, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03898489475250244, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03699544444680214, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02562917210161686, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.022285958752036095, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.020931454375386238, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02060519903898239, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012830804102122784, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.010820934548974037, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010675682686269283, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009891278110444546, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009681922383606434, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0067330170422792435, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00674933847039938, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006303682457655668, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004486752208322287, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012830804102122784, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012830804102122784, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22548627853393555, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.21073833107948303, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.20532287657260895, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.18534457683563232, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10585719347000122, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.1001695990562439, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12035279721021652, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11040734499692917, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10742858797311783, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09464861452579498, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08967551589012146, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.061221443116664886, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05280684679746628, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05071086063981056, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.050220973789691925, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.030583398416638374, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02596837468445301, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.025770697742700577, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023658333346247673, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023337500169873238, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015868982300162315, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015611290000379086, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015084086917340755, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009988443925976753, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015868982300162315, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015084086917340755, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.19.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21187177300453186, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.18709519505500793, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.17392520606517792, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1484309434890747, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09887672960758209, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08715340495109558, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.124130979180336, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11311466991901398, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1026398316025734, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0826430693268776, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07644109427928925, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06412152945995331, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.055384114384651184, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04881582409143448, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04715784639120102, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.032325927168130875, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02727898582816124, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026643982157111168, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02437148056924343, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023394692689180374, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01797272078692913, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.020045081153512, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01589181087911129, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015874551609158516, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01797272078692913, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015874551609158516, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17300869524478912, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16220475733280182, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15865109860897064, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14369350671768188, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08167800307273865, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07771305739879608, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09127704799175262, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08434761315584183, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08282213658094406, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07334896922111511, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0694618970155716, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04671545326709747, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.040605101734399796, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03942493721842766, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03913412615656853, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02341076172888279, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020741192623972893, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020656242966651917, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01910625398159027, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018935194239020348, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012596390210092068, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013233909383416176, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012211297638714314, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009598638862371445, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012596390210092068, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012596390210092068, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.19.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23249468207359314, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21796460449695587, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.213315948843956, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19331051409244537, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10931842029094696, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10405082255601883, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12196766585111618, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11255557835102081, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11083895713090897, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09814411401748657, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0928594246506691, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06198520585894585, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05380040779709816, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05233349651098251, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.0519789420068264, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030958572402596474, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02672417275607586, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02661886438727379, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.0244225412607193, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02420121803879738, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01604538783431053, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015852034091949463, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015528876334428787, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010104706510901451, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01604538783431053, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015528876334428787, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.19.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.26301392912864685, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.2341313362121582, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.22267146408557892, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.19596515595912933, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.12232545763254166, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.11053105443716049, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.14467288553714752, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.13279931247234344, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.126401886343956, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.10390093922615051, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09749127179384232, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07444313913583755, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06422805041074753, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.0594099685549736, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05823918431997299, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.037724681198596954, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.031792014837265015, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.03138211369514465, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.028269434347748756, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.027496205642819405, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.021123187616467476, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.021393517032265663, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.019661713391542435, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.015819285064935684, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.015819285064935684, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.015819285064935684, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1141294315457344, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10637537389993668, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10319823026657104, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09306902438402176, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0533064603805542, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05019393563270569, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.060957666486501694, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05617382377386093, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.054146844893693924, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04756473749876022, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04507802799344063, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03098013810813427, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.026865951716899872, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.025564108043909073, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.025247184559702873, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015495041385293007, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013181358575820923, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013053838163614273, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012022972106933594, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01182212121784687, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008092456497251987, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00811051670461893, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007650850806385279, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005358618218451738, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015495041385293007, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015495041385293007, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09569858014583588, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08923573791980743, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08627773821353912, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.0778677761554718, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04466146603226662, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04193072393536568, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05150366947054863, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.047591712325811386, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04538407176733017, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03992388769984245, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03791394457221031, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02613971382379532, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02275758422911167, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02142256870865822, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.021103493869304657, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01307027880102396, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011047501116991043, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010905916802585125, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010087928734719753, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.00988141167908907, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006832944694906473, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006845237221568823, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006419584155082703, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0045050885528326035, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01307027880102396, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01307027880102396, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22313682734966278, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.20836254954338074, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.20292481780052185, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.18318751454353333, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10459408909082413, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09886348247528076, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11885031312704086, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10913138091564178, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10619296133518219, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09348226338624954, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08861340582370758, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06045917794108391, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.052261289209127426, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05011111497879028, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.049590639770030975, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.030258290469646454, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.025680992752313614, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.025479448959231377, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02341344580054283, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023075995966792107, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01576712168753147, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015513919293880463, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014982176013290882, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009992346167564392, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01576712168753147, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014982176013290882, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.20.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.224090114235878, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.19682396948337555, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1881599873304367, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1615641564130783, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10443869978189468, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09433240443468094, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12067240476608276, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11050911247730255, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10756448656320572, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08653359860181808, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07824106514453888, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06220356374979019, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.053828250616788864, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.051053982228040695, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05041993036866188, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03133060038089752, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027820158749818802, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027647174894809723, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024491705000400543, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024075236171483994, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01746184192597866, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.019112272188067436, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01663084886968136, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014915158972144127, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01746184192597866, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014915158972144127, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17226245999336243, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16156421601772308, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1579720377922058, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14312946796417236, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08124042302370071, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07732009887695312, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09070884436368942, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08383465558290482, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.0823976993560791, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07294923067092896, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06914357095956802, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04642404243350029, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.040372688323259354, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03921375796198845, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03893367946147919, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.023257499560713768, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020656786859035492, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020572306588292122, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019030723720788956, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01886330544948578, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012514837086200714, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013208742253482342, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012135418131947517, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009609734639525414, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012514837086200714, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012514837086200714, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.20.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.22874252498149872, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21449585258960724, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20998230576515198, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1901400238275528, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.1074715256690979, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10234984010457993, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1198519766330719, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11062578856945038, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10901107639074326, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09645765274763107, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09133981913328171, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.0609687902033329, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05285901203751564, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05141833424568176, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05108637735247612, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03046400286257267, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026232687756419182, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026125263422727585, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02395777963101864, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02373834326863289, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015814922749996185, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015505884774029255, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015306195244193077, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.0098061328753829, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015814922749996185, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015306195244193077, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.20.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2637363076210022, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.234172061085701, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.2223966419696808, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.19563470780849457, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.12270773947238922, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.110558420419693, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.14531023800373077, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.13328590989112854, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.1267935037612915, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.10393049567937851, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09759290516376495, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07494403421878815, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06460024416446686, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.059704698622226715, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05852315574884415, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03812374174594879, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.03212788328528404, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.03171200305223465, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02854897268116474, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02778114192187786, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.02155747450888157, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.02181357890367508, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.020109040662646294, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01636374555528164, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01636374555528164, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01636374555528164, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12102635949850082, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11283659934997559, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10959647595882416, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09881451725959778, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05651155859231949, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.053274016827344894, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06437348574399948, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05939917638897896, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.057394154369831085, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05046054348349571, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04785038158297539, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03276472166180611, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.028470637276768684, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02715170383453369, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.026839232072234154, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016412271186709404, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014108400791883469, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01398186944425106, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012900427915155888, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012704049237072468, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008640458807349205, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008824792690575123, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008204442448914051, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.006050151772797108, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016412271186709404, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016412271186709404, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09744428098201752, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.0908668041229248, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.0877588540315628, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07922638207674026, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04544295370578766, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04260074719786644, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05258338153362274, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04858780652284622, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04616551846265793, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04064302518963814, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03863104060292244, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.026687802746891975, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02322458103299141, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.021808264777064323, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.021467357873916626, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013358701951801777, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011266275309026241, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01111337635666132, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010298370383679867, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01007882785052061, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007003250997513533, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007022035773843527, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006559677422046661, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0046491543762385845, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013358701951801777, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013358701951801777, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.21.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22734153270721436, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.21243758499622345, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.20716866850852966, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.18699751794338226, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10662977397441864, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10091982036828995, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12077515572309494, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11090607941150665, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1082245483994484, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09528545290231705, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09031233936548233, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.061453670263290405, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.053072720766067505, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05104772374033928, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.050556402653455734, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.030730729922652245, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026133578270673752, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.025950821116566658, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023821182548999786, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02350212074816227, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01597360521554947, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015714749693870544, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015216534957289696, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010082654654979706, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01597360521554947, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015216534957289696, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.21.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21894985437393188, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.19104136526584625, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.18085436522960663, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.15525847673416138, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10261116176843643, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.091684490442276, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12121807038784027, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1107049360871315, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10614819824695587, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08435042202472687, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07745827734470367, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0629715621471405, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.053838100284338, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05017152801156044, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04928266629576683, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03175656870007515, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02709745243191719, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026807039976119995, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02362627163529396, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023060990497469902, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017641142010688782, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.018469246104359627, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016545768827199936, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014028922654688358, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017641142010688782, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016545768827199936, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.21.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1696629673242569, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1590394824743271, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15561267733573914, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1409623622894287, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08001569658517838, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.076132632791996, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08927921950817108, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08251835405826569, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.0811527892947197, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07181139290332794, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06808106601238251, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.0456879660487175, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.0397283174097538, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03860609233379364, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.038343075662851334, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.022905822843313217, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020324714481830597, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.0202470812946558, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01872209645807743, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018561258912086487, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012343932874500751, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012968732044100761, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01198316179215908, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009419875219464302, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018561258912086487, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018561258912086487, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.21.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2262948453426361, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21220122277736664, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2077980637550354, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.18824493885040283, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10631439089775085, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10125935077667236, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11839289963245392, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10936396569013596, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.1078263446688652, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09543924778699875, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09033660590648651, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.060229066759347916, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05225614458322525, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05086648091673851, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05054302513599396, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030102089047431946, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.025944411754608154, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.025843823328614235, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.023696165531873703, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02348565310239792, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01564561389386654, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01532519981265068, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015168188139796257, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009695842862129211, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01564561389386654, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015168188139796257, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.21.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.26178494095802307, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.23134896159172058, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.21910728514194489, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.19275587797164917, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.1212029829621315, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10881128162145615, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.14525407552719116, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.1325155794620514, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12558867037296295, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.10237977653741837, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09629108756780624, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.0743078961968422, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06395918130874634, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.058845680207014084, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05759156867861748, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03753369301557541, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.03142806142568588, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.030989037826657295, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02781335450708866, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.026979221031069756, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.020762238651514053, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.021136363968253136, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01914897933602333, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.015508841723203659, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.015508841723203659, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.015508841723203659, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.22.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12467021495103836, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11640045046806335, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11314515024423599, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10214366763830185, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05821622163057327, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.054944269359111786, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06620930135250092, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.061080511659383774, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0591183565557003, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05205072835087776, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0493486262857914, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.033640529960393906, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.029207691550254822, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.027906576171517372, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.027593975886702538, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016825426369905472, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014380579814314842, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014259452931582928, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013131100684404373, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01293633971363306, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008790405467152596, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008816801942884922, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008345131762325764, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005831282120198011, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016825426369905472, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016825426369905472, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.22.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09849352389574051, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09193525463342667, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08892438560724258, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08031781017780304, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04592612758278847, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04313616454601288, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05290551111102104, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04887453094124794, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04664983972907066, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.041097696870565414, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.039041221141815186, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02682000771164894, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.023346146568655968, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02201118879020214, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.021693356335163116, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013407677412033081, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011329246684908867, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01118555199354887, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010349240154027939, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01014913059771061, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006994122639298439, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006985941901803017, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00657252361997962, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004555682186037302, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013407677412033081, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013407677412033081, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.22.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22585424780845642, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.21122592687606812, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.20577554404735565, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.18604795634746552, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10593971610069275, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10024046897888184, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1203872486948967, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11055771261453629, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10755044966936111, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09488844871520996, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09000653028488159, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06122959405183792, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.052966855466365814, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05075506120920181, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05022638291120529, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.030630193650722504, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.025976721197366714, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.025763262063264847, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023697538301348686, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02336515486240387, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015936149284243584, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01563531719148159, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015153172425925732, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009989358484745026, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015936149284243584, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015153172425925732, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.22.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22477097809314728, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.20088841021060944, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.19305063784122467, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.16579829156398773, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10456588119268417, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09533286094665527, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12086689472198486, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11024710536003113, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10743855684995651, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08782295137643814, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0803631991147995, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06251204013824463, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05395107343792915, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05143101513385773, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.050814490765333176, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03196752071380615, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.028351914137601852, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.028205594047904015, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025276219472289085, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02489870972931385, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01859442889690399, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.019843148067593575, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.017874276265501976, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015860665589571, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01859442889690399, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015860665589571, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.22.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1658368855714798, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.15545272827148438, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15207314491271973, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.13781780004501343, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07810958474874496, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07433610409498215, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08711722493171692, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08051151782274246, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07921630144119263, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07010409981012344, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06652757525444031, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04453129693865776, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.038696758449077606, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.037612900137901306, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03736359626054764, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.022291842848062515, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.019695576280355453, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.019615834578871727, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01811719313263893, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01795862801373005, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011920483782887459, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01241510920226574, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01156399305909872, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008848115801811218, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01811719313263893, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01795862801373005, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.22.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.22217851877212524, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.20843730866909027, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20408231019973755, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1849612146615982, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.1043676882982254, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09941913932561874, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11617051064968109, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10733090341091156, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10583289712667465, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09372653067111969, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08879030495882034, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05909484252333641, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.051280491054058075, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04992685094475746, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04960724338889122, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.029507586732506752, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.025455499067902565, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02535717561841011, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.023254122585058212, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.023049183189868927, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01529061608016491, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01501932367682457, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014824803918600082, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009477447718381882, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01529061608016491, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01501932367682457, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.22.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.25666913390159607, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22630520164966583, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.2138863205909729, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18809276819229126, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11874359101057053, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10611136257648468, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1422899067401886, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.1302100121974945, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12311401218175888, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.10003263503313065, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09404738992452621, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07283655554056168, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.0628243237733841, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05757864937186241, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05629410222172737, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.0369674488902092, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.030657628551125526, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.030202126130461693, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02704613469541073, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02619929425418377, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.020585456863045692, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.020531604066491127, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.018983379006385803, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014912202954292297, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014912202954292297, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014912202954292297, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.23.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11994920670986176, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11206099390983582, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10894693434238434, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09840352833271027, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05603615939617157, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05291035398840904, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06374339014291763, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05877097696065903, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05691169947385788, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05016135796904564, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04767906665802002, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03240333870053291, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.028103293851017952, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026862025260925293, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02656402997672558, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016210399568080902, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013820755295455456, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013700389303267002, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012624531053006649, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012431679293513298, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008472424000501633, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008438009768724442, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008056821301579475, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005537680350244045, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016210399568080902, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016210399568080902, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.23.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09767933189868927, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09120392799377441, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.088297039270401, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07979529350996017, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.045542702078819275, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04283280670642853, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05232928320765495, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.048390135169029236, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04625709354877472, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04077128693461418, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03874674439430237, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02653476968407631, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.023109033703804016, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.021846480667591095, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02153504267334938, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013273272663354874, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.0112504493445158, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01111624762415886, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01028390321880579, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010090261697769165, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006936856545507908, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006941997911781073, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006536737084388733, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004553449340164661, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013273272663354874, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013273272663354874, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.23.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.231725811958313, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.21690641343593597, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21157604455947876, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19148102402687073, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.1087784618139267, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.1030721515417099, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12306498736143112, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11305296421051025, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11039942502975464, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09749231487512589, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09248364716768265, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06261558085680008, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05411609262228012, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.052098143845796585, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.051612552255392075, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03130427747964859, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026636475697159767, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026455707848072052, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024306733161211014, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02398483082652092, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01626395806670189, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01593879424035549, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015512236393988132, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010145734995603561, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01626395806670189, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015512236393988132, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.23.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2250690907239914, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2026754468679428, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.19378405809402466, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.16807587444782257, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10550083965063095, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09620274603366852, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12464357167482376, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11363007873296738, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.108436219394207, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08969538658857346, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08270090818405151, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06400062888860703, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05487319827079773, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0511418879032135, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05025295168161392, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0320434495806694, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027043836191296577, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026712553575634956, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023958511650562286, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023378878831863403, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017142217606306076, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.017748858779668808, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01592063531279564, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.012770533561706543, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017142217606306076, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01592063531279564, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.23.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.16956675052642822, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.15902648866176605, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15564948320388794, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.141034796833992, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07995366305112839, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.0761154517531395, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.0890619084239006, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08234889060258865, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08109207451343536, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07181763648986816, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06812381744384766, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.0455620251595974, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.039642926305532455, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03855545073747635, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03830096125602722, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.022830737754702568, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020267855376005173, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020190175622701645, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018668100237846375, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018506817519664764, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01225806213915348, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012881331145763397, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011905229650437832, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00931162852793932, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018506817519664764, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018506817519664764, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.23.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.22653797268867493, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2123183161020279, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20797373354434967, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.18848569691181183, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.1063804179430008, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10135180503129959, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1184597909450531, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.1093907505273819, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10789903253316879, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09555640071630478, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09053914994001389, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.060218695551157, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05227182060480118, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.0509168803691864, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.050594523549079895, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030095931142568588, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.025995494797825813, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.025904826819896698, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.023763490840792656, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02356194145977497, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.0156550295650959, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015396347269415855, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015192314982414246, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009805025532841682, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.0156550295650959, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015192314982414246, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.23.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.26041364669799805, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22941914200782776, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.21645313501358032, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.19048479199409485, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.1205497607588768, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10751091688871384, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.14476346969604492, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.1326780915260315, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12508687376976013, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.1014256700873375, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09561388939619064, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07437577843666077, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06398876756429672, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.058416929095983505, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05706174671649933, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03768431395292282, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.0310080423951149, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.030517693608999252, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02729181945323944, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.0263955257833004, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.020969295874238014, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.020684024319052696, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.019281772896647453, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014870456419885159, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014870456419885159, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014870456419885159, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.24.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12928223609924316, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.12081564217805862, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.117652527987957, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10634417086839676, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06051027774810791, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.057250551879405975, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06864357739686966, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06323537975549698, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06143637374043465, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05421065911650658, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0515054315328598, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03494788333773613, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.03029448352754116, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.029048403725028038, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.028751913458108902, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0175057053565979, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.015034209936857224, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014919298700988293, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013760850764811039, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01357333268970251, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0092002023011446, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00928631704300642, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00878572091460228, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.006278171669691801, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0175057053565979, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014919298700988293, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.24.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10394137352705002, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09708959609270096, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09413516521453857, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08510508388280869, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04854413866996765, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04572953283786774, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05552509427070618, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05135868117213249, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04929124563932419, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04348192363977432, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.041304316371679306, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.028180265799164772, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.024547774344682693, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.023272491991519928, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.022967232391238213, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014092444442212582, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01198960468173027, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01185489259660244, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010960988700389862, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010766212828457355, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007366131525486708, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007375071756541729, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0069680907763540745, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004841877147555351, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014092444442212582, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014092444442212582, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.24.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2432919442653656, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22779104113578796, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22244679927825928, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20145970582962036, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11435744911432266, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10849198698997498, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1288825422525406, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.118560791015625, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11600009351968765, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10251372307538986, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09728790819644928, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0655263364315033, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05667766183614731, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.054706014692783356, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.054243072867393494, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03273729234933853, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02791871689260006, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027749555185437202, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025479795411229134, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02517923340201378, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01693328469991684, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01660015806555748, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016208553686738014, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01047372817993164, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01693328469991684, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016208553686738014, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.24.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22447404265403748, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.19761104881763458, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.18851670622825623, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.15796759724617004, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10456600785255432, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09434483200311661, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12169327586889267, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11110904812812805, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10777599364519119, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0843811109662056, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07641665637493134, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06276731193065643, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05396141856908798, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05097981542348862, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05026715248823166, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03167019039392471, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027491897344589233, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027283838018774986, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023706870153546333, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023229548707604408, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017628265544772148, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01860402338206768, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016710950061678886, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014195293188095093, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017628265544772148, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016710950061678886, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.24.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.16892777383327484, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1583947390317917, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15502063930034637, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14056669175624847, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07957926392555237, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07574481517076492, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08867888152599335, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08197415620088577, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08072128891944885, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07148942351341248, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0678190290927887, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04529954493045807, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.039398226886987686, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.038327693939208984, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03807617723941803, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02269834280014038, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02006629668176174, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.01999529078602791, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018466118723154068, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018307985737919807, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012129688635468483, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012646902352571487, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011778268963098526, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00902275275439024, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018466118723154068, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018307985737919807, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.24.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.22465386986732483, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21077053248882294, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2063780426979065, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.18709826469421387, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.1055130660533905, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10051609575748444, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11748287826776505, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10846395790576935, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10701892524957657, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.0947909876704216, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08985263854265213, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05974588915705681, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.0518282912671566, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.0504796989262104, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05017035827040672, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02988569065928459, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.025763627141714096, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.025668835267424583, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.023546438664197922, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02334398590028286, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015594114549458027, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015227730385959148, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01514244545251131, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009657390415668488, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015594114549458027, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01514244545251131, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.24.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.25021499395370483, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.2207595705986023, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.2087266445159912, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1834646314382553, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11561285704374313, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10336746275424957, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13819299638271332, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12669944763183594, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11980429291725159, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09737127274274826, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.0916047915816307, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07094048708677292, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06101130321621895, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05594046041369438, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05470128729939461, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03587067499756813, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02958054281771183, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.029135866090655327, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02602347731590271, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02519294247031212, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01977662369608879, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01955517567694187, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.018185054883360863, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013924181461334229, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.018185054883360863, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.018185054883360863, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.25.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12453675270080566, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11648430675268173, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11337146162986755, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1025134027004242, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05823998525738716, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.055094163864851, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06619038432836533, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06095580384135246, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.059088654816150665, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.052197251468896866, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04965990409255028, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03365197405219078, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.029173007234930992, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02792203612625599, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.027625445276498795, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016836289316415787, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014371715486049652, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014252652414143085, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013149804435670376, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012960361316800117, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008794821798801422, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008773382753133774, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008386585861444473, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005777980666607618, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016836289316415787, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016836289316415787, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.25.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10079353302717209, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09422557801008224, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09129871428012848, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08259622007608414, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.047072961926460266, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04431874677538872, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05403923988342285, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.049925826489925385, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04780472442507744, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04219077527523041, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.040122345089912415, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02744581364095211, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.023868829011917114, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.022582687437534332, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.022275734692811966, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013721303083002567, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011648785322904587, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011516859754920006, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010663222521543503, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010467571206390858, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00719370786100626, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007206416223198175, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006789067760109901, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004763318691402674, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013721303083002567, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013721303083002567, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.25.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2375522255897522, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22257031500339508, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21750688552856445, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1971195489168167, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.1115991547703743, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10599424690008163, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12549646198749542, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11549254506826401, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11318419873714447, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.1001594215631485, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09507544338703156, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06386958062648773, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05521062761545181, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05339762195944786, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.052963148802518845, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.031884096562862396, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027225350961089134, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027067342773079872, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024858364835381508, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024581676349043846, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01648830622434616, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01614033617079258, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015802912414073944, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010150712914764881, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01648830622434616, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015802912414073944, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.25.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2280530035495758, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.20269346237182617, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1945985108613968, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.16052021086215973, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10732035338878632, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09799452126026154, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1228659600019455, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11244328320026398, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10995803028345108, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08715979009866714, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0762949213385582, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06355255097150803, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.054726697504520416, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05242101475596428, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05188348516821861, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.032086942344903946, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.028381429612636566, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02822389453649521, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0245892982929945, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0242224782705307, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.018057113513350487, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01919584535062313, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.017354244366288185, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014850298874080181, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.018057113513350487, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.017354244366288185, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.25.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17397452890872955, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16320399940013885, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15973296761512756, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14492247998714447, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08199188113212585, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07808023691177368, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09133166819810867, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08437488228082657, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.0831487774848938, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07365596294403076, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06987205147743225, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04659431055188179, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04052693769335747, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03944142535328865, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03918808698654175, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.023365134373307228, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020565060898661613, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020483573898673058, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018898826092481613, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018741769716143608, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012473388575017452, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012821316719055176, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012121771462261677, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008993232622742653, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012473388575017452, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012473388575017452, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.25.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23150701820850372, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21726001799106598, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21284368634223938, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19298575818538666, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.1088169664144516, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10371846705675125, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12122292071580887, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11179685592651367, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11031809449195862, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09780089557170868, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09275668114423752, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.061625007539987564, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05344065651297569, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.052086904644966125, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05176194757223129, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030871009454131126, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02660210430622101, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02650527097284794, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024325571954250336, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02411743625998497, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016200043261051178, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015749065205454826, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01575094647705555, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.01002692710608244, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016200043261051178, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015749065205454826, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.25.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2562217712402344, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22533965110778809, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.2127717286348343, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1866443008184433, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11842165142297745, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10556991398334503, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1411571502685547, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.1299048662185669, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.122874416410923, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09930619597434998, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09345883131027222, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.0723937526345253, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06286253780126572, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.057512067258358, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.056175168603658676, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.036560606211423874, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.030763257294893265, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.03029314987361431, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02710486575961113, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.026236295700073242, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01975863054394722, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.020780880004167557, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.018002254888415337, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.015258450992405415, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.018002254888415337, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.015258450992405415, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.26.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12468119710683823, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11668556183576584, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11357598751783371, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1028529554605484, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.058337073773145676, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.055236030369997025, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06616181135177612, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06102008745074272, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05919115990400314, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05234222859144211, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04979473352432251, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03367210924625397, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.029194507747888565, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.027977801859378815, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.027685154229402542, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016853773966431618, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014409158378839493, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014295823872089386, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013197369873523712, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01301183458417654, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008826487697660923, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00881859753280878, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008428013883531094, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005837834905833006, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016853773966431618, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016853773966431618, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.26.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10379455238580704, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09712405502796173, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.0943027138710022, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08531545847654343, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.048517435789108276, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0457710362970829, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.0554194450378418, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.051182277500629425, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04925009235739708, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04354183375835419, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04137043282389641, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02814851514995098, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.024468638002872467, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02325345203280449, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.022964656352996826, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01407418679445982, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011953229084610939, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011831807903945446, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01094076782464981, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010756044648587704, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007347827777266502, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007319708354771137, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006962879560887814, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004774936009198427, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01407418679445982, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01407418679445982, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.26.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23888258635997772, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22397829592227936, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21899141371250153, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.198361337184906, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11223294585943222, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10672923177480698, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12588687241077423, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11586994677782059, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11382152885198593, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10072530061006546, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09557255357503891, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0639888346195221, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.055359579622745514, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.053643643856048584, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05323071777820587, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.031944479793310165, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02726845256984234, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02712543122470379, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02488577924668789, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02462906949222088, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016438214108347893, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0160209983587265, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015785004943609238, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009915968403220177, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016438214108347893, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015785004943609238, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.26.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23553191125392914, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.20096664130687714, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.18947163224220276, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.15875272452831268, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10923875868320465, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09651736170053482, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12820161879062653, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11718541383743286, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1134924367070198, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08593492954969406, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07848821580410004, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06635700166225433, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05717874690890312, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05363808199763298, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0527818463742733, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03380163013935089, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02940482459962368, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02918068878352642, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025122705847024918, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02458634413778782, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.019499752670526505, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.02048572339117527, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.018478188663721085, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01610858365893364, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.018478188663721085, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01610858365893364, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.26.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17949700355529785, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16843242943286896, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.16498279571533203, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1496669352054596, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.0846206471323967, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08064837753772736, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09407385438680649, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08701533079147339, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08581794053316116, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07606444507837296, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07210419327020645, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04799240827560425, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.041755311191082, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04066600650548935, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04040468856692314, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.0240060742944479, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.0211233738809824, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.021050408482551575, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019407227635383606, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019244037568569183, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01268388144671917, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013067308813333511, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012326788157224655, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009052099660038948, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01268388144671917, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01268388144671917, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.26.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.234018474817276, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21968688070774078, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21527217328548431, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19525007903575897, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11000670492649078, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10491859167814255, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12217139452695847, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11300097405910492, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11154364049434662, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.0989416092634201, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09373274445533752, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.062109820544719696, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05397715047001839, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05261752009391785, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.052296947687864304, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03101557493209839, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026818785816431046, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02673032134771347, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024523619562387466, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024316998198628426, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016023488715291023, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01579175889492035, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015560408122837543, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009944099001586437, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016023488715291023, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015560408122837543, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.26.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.25736886262893677, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.2252822071313858, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.21202467381954193, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1861039698123932, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11873206496238708, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10535559058189392, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.14236867427825928, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.1309228241443634, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12335143983364105, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09921414405107498, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09328609704971313, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07281647622585297, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06303437799215317, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05745723471045494, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05608983337879181, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03662373870611191, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.03036743402481079, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.029868196696043015, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.026554854586720467, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.025631053373217583, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.019704870879650116, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.020113380625844002, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017908766865730286, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014245158061385155, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017908766865730286, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017908766865730286, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.27.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12170623242855072, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11390745639801025, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11077440530061722, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10016006976366043, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05689087137579918, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05380766838788986, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.0647549033164978, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05969083309173584, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.057746414095163345, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.051042910665273666, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.048583440482616425, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.032986633479595184, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.028576204553246498, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.027311554178595543, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02700771577656269, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016509640961885452, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014111857861280441, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013985414989292622, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0129312202334404, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01273967046290636, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008672947995364666, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008715158328413963, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008263392373919487, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005838440265506506, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016509640961885452, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016509640961885452, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.27.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10212846845388412, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09553574025630951, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09256552904844284, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08374445885419846, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04771143198013306, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.044920291751623154, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.054766785353422165, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.050610657781362534, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04842661693692207, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04280799627304077, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04072457551956177, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.027799835428595543, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02419644594192505, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0228736512362957, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.022558685392141342, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01389993354678154, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011777261272072792, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0116428779438138, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010779941454529762, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010577572509646416, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007267131470143795, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007250823546200991, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006856787018477917, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004747434053570032, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01389993354678154, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01389993354678154, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.27.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23880484700202942, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2239862084388733, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21889834105968475, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1982313096523285, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11212876439094543, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10660039633512497, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12604142725467682, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11602161824703217, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11373792588710785, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10071168094873428, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09561421722173691, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06411276757717133, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.055387113243341446, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.053631555289030075, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05320002883672714, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.031979843974113464, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02730412222445011, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027161719277501106, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024938475340604782, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024669315665960312, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016483355313539505, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016118325293064117, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015807470306754112, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01006852276623249, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016483355313539505, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015807470306754112, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.27.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22818726301193237, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.19848479330539703, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1887759268283844, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.15742266178131104, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10615896433591843, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09493374824523926, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12346629053354263, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11280295252799988, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10972271859645844, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08544152975082397, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0756591409444809, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06375882774591446, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05470532551407814, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05173312872648239, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05100523680448532, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.032261885702610016, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027788741514086723, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027598470449447632, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023853758350014687, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023392928764224052, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.018076831474900246, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.018663842231035233, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.017175935208797455, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014114209450781345, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.018076831474900246, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.017175935208797455, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.27.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.18736429512500763, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.17585963010787964, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1722542941570282, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.15634006261825562, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08829919993877411, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08414839208126068, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09836159646511078, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09077959507703781, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08952603489160538, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.0793764740228653, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07539360225200653, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05019206553697586, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.043590396642684937, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.042462896555662155, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.042206861078739166, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.025164784863591194, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.022145602852106094, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.022068262100219727, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02037121169269085, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.020202884450554848, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013473308645188808, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013812457211315632, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.013113845139741898, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009711825288832188, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013473308645188808, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013473308645188808, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.27.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23536786437034607, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2209719866514206, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2165447324514389, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19638854265213013, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11063337326049805, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10550807416439056, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12295190244913101, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11361229419708252, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11215993016958237, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09947808086872101, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09437134861946106, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06248737871646881, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.0543232224881649, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05297113582491875, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05264807492494583, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03127489984035492, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02705940417945385, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026964358985424042, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024761874228715897, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02455255202949047, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01631278172135353, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01603417471051216, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015857426449656487, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010241085663437843, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01631278172135353, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015857426449656487, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.27.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2580714225769043, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.224877268075943, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.2106592059135437, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18430449068546295, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11887285858392715, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10478288680315018, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.14366696774959564, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.13221970200538635, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.1237262710928917, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09885066002607346, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09291113913059235, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07352811843156815, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06358539313077927, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05750083550810814, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.055983271449804306, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03691147640347481, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.030307946726679802, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02975253015756607, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02639099955558777, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.025375280529260635, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.019827552139759064, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.020069964230060577, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017887985333800316, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014049013145267963, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017887985333800316, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017887985333800316, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.28.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12954895198345184, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.12152229994535446, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11844341456890106, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10752727836370468, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06063953414559364, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.057510729879140854, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06873929500579834, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06339821964502335, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0614662729203701, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05461942031979561, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0520898662507534, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0349578894674778, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.03031298704445362, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.029067713767290115, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02876727469265461, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01748562976717949, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014944866299629211, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014825265854597092, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013729206286370754, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01354022417217493, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.009133406914770603, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.009099137037992477, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008720926009118557, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005971436854451895, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01748562976717949, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014944866299629211, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.28.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10623858869075775, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09965406358242035, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09666158258914948, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08768357336521149, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0496685728430748, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04687608778476715, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.056812554597854614, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05259186029434204, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05037667974829674, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04473193362355232, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04259870573878288, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.028827998787164688, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02512216754257679, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.023813197389245033, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.023493563756346703, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014424387365579605, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012268560007214546, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01213187724351883, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011266734451055527, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01107146218419075, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007547808811068535, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007558936253190041, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007141314912587404, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004974730312824249, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014424387365579605, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014424387365579605, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.28.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23896324634552002, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22464440762996674, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21986103057861328, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19986492395401, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11227202415466309, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10690715909004211, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12592720985412598, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11602719128131866, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11378339678049088, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10128628462553024, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09640976786613464, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06405210494995117, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0554356575012207, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.053688712418079376, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05326645076274872, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03196176141500473, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02735977992415428, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027211032807826996, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025090688839554787, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02482929453253746, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016506768763065338, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01617491990327835, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015841232612729073, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010139538906514645, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016506768763065338, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015841232612729073, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.28.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2007496953010559, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.17926497757434845, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.17239691317081451, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1465040147304535, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09467453509569168, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.086678646504879, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10808888077735901, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09882991015911102, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09691856801509857, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07787688076496124, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0699780061841011, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.055849362164735794, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.048086170107126236, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04622459039092064, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04578588157892227, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.028167061507701874, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.025000659748911858, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02488524466753006, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02182304859161377, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.021546831354498863, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015775110572576523, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016870860010385513, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015207978896796703, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013040545396506786, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015775110572576523, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015207978896796703, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.28.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1695554554462433, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1596175730228424, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15645000338554382, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14252394437789917, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07993729412555695, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07630857080221176, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08879108726978302, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08212035149335861, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08100501447916031, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07220586389303207, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06876027584075928, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04531235247850418, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03944243863224983, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.038442667573690414, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03820106387138367, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.022708049044013023, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020048469305038452, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.019975924864411354, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018511610105633736, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018366148695349693, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012153911404311657, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012500368058681488, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011834665201604366, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008786765858530998, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018511610105633736, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018366148695349693, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.28.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2288292944431305, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21552547812461853, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.211442768573761, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19265885651111603, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10757128894329071, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10282100737094879, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.119562529027462, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11039159446954727, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10900324583053589, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.0972389206290245, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09261197596788406, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06075771898031235, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.052741002291440964, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.051471155136823654, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05116899311542511, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030428700149059296, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026282569393515587, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026198875159025192, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02415766380727291, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02396404929459095, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016001161187887192, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015553710982203484, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015587306581437588, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009916671551764011, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016001161187887192, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015553710982203484, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.28.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.24912138283252716, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.21497933566570282, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.1985856145620346, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.17390407621860504, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11473353207111359, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09908846020698547, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.14194218814373016, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.13087604939937592, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11987803131341934, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09496218711137772, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08970705419778824, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07281775027513504, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06377024948596954, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05618131160736084, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05427287891507149, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03686891496181488, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.03082653321325779, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.03008178621530533, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.027182113379240036, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.025956876575946808, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.02018687315285206, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.02203870192170143, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017812354490160942, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.016806451603770256, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017812354490160942, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.016806451603770256, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.29.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.127280130982399, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11958565562963486, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11665066331624985, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10600729286670685, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.059556301683187485, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05658785626292229, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.0673438236117363, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06213446334004402, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.060391616076231, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05376888066530228, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05125786364078522, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03425274416804314, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.029712552204728127, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.028544893488287926, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.028271237388253212, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.017136916518211365, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014697522856295109, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014584203250706196, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013521979562938213, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.013347416184842587, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008981497958302498, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00896475836634636, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008599686436355114, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005917567294090986, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.017136916518211365, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.017136916518211365, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.29.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10698577761650085, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10048292577266693, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09777934104204178, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08883678168058395, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05002458021044731, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04737485572695732, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.056902721524238586, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05257471650838852, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05071372166275978, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04512612149119377, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04299614578485489, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02887747623026371, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02511942945420742, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.023958291858434677, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02367439493536949, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014431113377213478, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012315131723880768, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012197795324027538, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01131841354072094, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011146841570734978, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007532413583248854, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0075097414664924145, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007163133937865496, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004897205624729395, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014431113377213478, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014431113377213478, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.29.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23796962201595306, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22394995391368866, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21926413476467133, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19959641993045807, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11165380477905273, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10656584799289703, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12490014731884003, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11509441584348679, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11316317319869995, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.1008857935667038, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09608961641788483, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06346844881772995, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05494657903909683, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.053358256816864014, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05298210680484772, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03168339282274246, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027098439633846283, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02697235345840454, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024866560474038124, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024626025930047035, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01630042865872383, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015867331996560097, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01569797657430172, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009770724922418594, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01630042865872383, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01569797657430172, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.29.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2210005819797516, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.18978996574878693, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1796489655971527, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1527952402830124, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10304498672485352, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09161185473203659, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1194295734167099, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1095980703830719, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10647278279066086, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08182650059461594, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07535774260759354, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0618315227329731, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.053513720631599426, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.050555888563394547, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.049860041588544846, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03145907446742058, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027764296159148216, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027576155960559845, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023877263069152832, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023432841524481773, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.018059227615594864, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01933763548731804, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.017196733504533768, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015297267585992813, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.018059227615594864, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015297267585992813, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.29.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1717320680618286, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1618129462003708, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15863919258117676, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1446349173784256, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08091206103563309, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07732286304235458, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.0897238627076149, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08306523412466049, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08197533339262009, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07317446917295456, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06970475614070892, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04577464237809181, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.039843831211328506, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.038866184651851654, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03863929212093353, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.022870047017931938, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02020074427127838, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020131025463342667, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018655315041542053, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018515169620513916, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012102097272872925, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012499951757490635, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011780858039855957, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008681632578372955, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018515169620513916, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018515169620513916, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.29.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23099134862422943, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2176830917596817, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2136249989271164, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1947716772556305, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10858294367790222, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10384975373744965, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12022599577903748, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11133172363042831, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10999567806720734, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09828998893499374, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09352648258209229, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06108536943793297, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05317053943872452, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.051920413970947266, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.051611002534627914, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030489454045891762, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02644871175289154, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026365116238594055, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02432139404118061, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024133432656526566, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01576288975775242, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015554947778582573, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015340641140937805, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009797787293791771, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01576288975775242, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015340641140937805, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.29.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2527429461479187, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.21779489517211914, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.20130835473537445, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.17656482756137848, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.1158648431301117, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10028471052646637, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.14404991269111633, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.13210587203502655, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12124329060316086, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09570349752902985, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09074820578098297, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07373738288879395, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.0636468157172203, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05631659924983978, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05446978658437729, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.037213049829006195, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.030143652111291885, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.029437294229865074, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02629108354449272, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.025074714794754982, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.020461326465010643, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.020731154829263687, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.018170179799199104, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014995074830949306, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.018170179799199104, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014995074830949306, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.30.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12527893483638763, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11763227730989456, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1146954596042633, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10420796275138855, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.058575332164764404, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.055614668875932693, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06635185331106186, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.061144739389419556, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05939636006951332, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05286923423409462, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05042671784758568, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.033746086061000824, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.029243601486086845, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.028088973835110664, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02780243009328842, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016898617148399353, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014489908702671528, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014378936029970646, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013338709250092506, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01316141802817583, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008873353712260723, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008887971751391888, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008488508872687817, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005922275595366955, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016898617148399353, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016898617148399353, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.30.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10661531239748001, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1001381054520607, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09729887545108795, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08828747272491455, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.049823883920907974, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04714757576584816, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05682893842458725, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05255003273487091, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05053536593914032, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.044939763844013214, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04283645376563072, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.028842488303780556, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.025098757818341255, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02387155406177044, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.023578539490699768, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014419643208384514, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01228157989680767, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012154461815953255, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011289305984973907, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01110097672790289, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007532484829425812, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007516259327530861, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007146743126213551, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004910666961222887, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014419643208384514, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014419643208384514, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.30.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.24099504947662354, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22678661346435547, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22206135094165802, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20208583772182465, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.113127201795578, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10790102183818817, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12652185559272766, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11667118221521378, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11462777107954025, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10219370573759079, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09723304212093353, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06429672241210938, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05570012703537941, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05405423417687416, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05366462841629982, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03208499774336815, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027493448927998543, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02735450305044651, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025226585566997528, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024971047416329384, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016516562551259995, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016156859695911407, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015894973650574684, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01003488153219223, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016516562551259995, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015894973650574684, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.30.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.20931455492973328, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.18446537852287292, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.17643392086029053, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.14972616732120514, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0975978672504425, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0880281999707222, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1130155697464943, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10315708816051483, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10060174018144608, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08030108362436295, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07154346257448196, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.058371882885694504, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.050189338624477386, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04769664257764816, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04709617421030998, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.029568638652563095, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.025843307375907898, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.025695540010929108, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.022593362256884575, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02221657894551754, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016745300963521004, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.017585884779691696, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016012538224458694, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013596705161035061, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016745300963521004, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016012538224458694, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.30.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17836754024028778, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16807614266872406, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1648825854063034, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.15036611258983612, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08402491360902786, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08032870292663574, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09334982931613922, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08622438460588455, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08512291312217712, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07600843906402588, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07252420485019684, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04762857407331467, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04139012098312378, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04039037600159645, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.0401599183678627, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.023885954171419144, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.021065011620521545, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020998062565922737, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01947495900094509, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01932540535926819, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01283063180744648, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013136067427694798, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012507939711213112, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009240524843335152, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01283063180744648, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01283063180744648, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.30.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23312531411647797, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21984300017356873, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21569885313510895, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1967056691646576, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10962121188640594, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10485618561506271, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12161991000175476, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.1123742088675499, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.1110435426235199, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09923606365919113, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0944729596376419, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06176025792956352, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05371136963367462, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05245205760002136, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.052153848111629486, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030915897339582443, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026786042377352715, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02670007385313511, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024651210755109787, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02446223609149456, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016194265335798264, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015849711373448372, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015788214281201363, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010117651894688606, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016194265335798264, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015788214281201363, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.30.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2494792640209198, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.21454070508480072, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.19707192480564117, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.17273765802383423, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.1143329069018364, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09816320240497589, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1435195803642273, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.13190993666648865, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11978776007890701, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09436199814081192, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08956026285886765, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07352066040039062, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06349582225084305, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.055508825927972794, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05348094552755356, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03696480393409729, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.029570497572422028, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02876245602965355, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.025720704346895218, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.024370357394218445, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.02008369378745556, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.020302796736359596, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017573680728673935, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014408472925424576, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017573680728673935, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017573680728673935, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.31.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12215307354927063, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11462962627410889, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11178161203861237, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10153280943632126, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.057060569524765015, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05415493994951248, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06463293731212616, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05962422862648964, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05786159634590149, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05147101357579231, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04905702546238899, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.032818734645843506, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.028478754684329033, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.027316203340888023, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.027033723890781403, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01641557365655899, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014019448310136795, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013911745510995388, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012885131873190403, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012708463706076145, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008579611778259277, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00849878415465355, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00819309800863266, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005523179192095995, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01641557365655899, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01641557365655899, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.31.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1041942685842514, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09773419797420502, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09496159851551056, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08625919371843338, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04863162711262703, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04599843919277191, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05553550645709038, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05130236595869064, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04932098090648651, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.043862245976924896, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0418073795735836, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.028186790645122528, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.024511683732271194, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.023301050066947937, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.023009881377220154, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014093459583818913, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011981593444943428, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011857149191200733, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0110144829377532, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010827453806996346, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007360944524407387, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00733561534434557, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006978917401283979, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0047887288965284824, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014093459583818913, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014093459583818913, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.31.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23877264559268951, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22449198365211487, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21968203783035278, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19981984794139862, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11180420219898224, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.1065293624997139, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12528707087039948, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11553750187158585, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11330967396497726, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10095728933811188, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09609468281269073, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0636388286948204, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05513373017311096, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05342506989836693, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.053020477294921875, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03174979239702225, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027182292193174362, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027038952335715294, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024945063516497612, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02467789128422737, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016388898715376854, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016009541228413582, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015757717192173004, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009950022213160992, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016388898715376854, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015757717192173004, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.31.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21947096288204193, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1860443651676178, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.17458856105804443, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.14810672402381897, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10107038170099258, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08827177435159683, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11987956613302231, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1097579374909401, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10572564601898193, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08039878308773041, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07354139536619186, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.061854347586631775, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0533866211771965, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.049418967217206955, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04847019165754318, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.031302355229854584, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026914488524198532, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02666371501982212, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023097597062587738, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.022476188838481903, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0176639873534441, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01867670565843582, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01650202088057995, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014400433748960495, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0176639873534441, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01650202088057995, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.31.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.18198707699775696, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.171515554189682, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.16821888089179993, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.15331855416297913, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08570905029773712, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08194297552108765, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09521540254354477, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.0879470556974411, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08682918548583984, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07755886763334274, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0739167332649231, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.048534393310546875, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04219859093427658, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04118039086461067, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04094081372022629, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.024322878569364548, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.021417442709207535, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.021342480555176735, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019785011187195778, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019633222371339798, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01298800203949213, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01326949242502451, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012659482657909393, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00923862773925066, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01298800203949213, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01298800203949213, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.31.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2328616976737976, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2194635421037674, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21534790098667145, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19634772837162018, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10938917100429535, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10464970767498016, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12137395143508911, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.1121678352355957, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11082036793231964, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09897179901599884, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09436312317848206, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.0616384893655777, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05358710139989853, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05232665687799454, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05203423649072647, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030799303203821182, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02675088681280613, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026663171127438545, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02460891753435135, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024425137788057327, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016042400151491165, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015865936875343323, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015620122663676739, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010171733796596527, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016042400151491165, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015620122663676739, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.31.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.25411656498908997, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22119513154029846, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.20700861513614655, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18176527321338654, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11689800024032593, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10302979499101639, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.14253827929496765, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.13035082817077637, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12178429216146469, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09748288244009018, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09197071194648743, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07283984124660492, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06291921436786652, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.056800663471221924, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05528108403086662, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03666321560740471, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.0304451622068882, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02987552434206009, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.026742517948150635, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.025731846690177917, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.020015131682157516, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.020769061520695686, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01806267723441124, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.015205045230686665, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01806267723441124, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.015205045230686665, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.32.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12046851217746735, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11301156878471375, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11014571785926819, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09998559206724167, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05625307932496071, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.053350530564785004, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06378383934497833, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05878069996833801, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.057035673409700394, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05071699619293213, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04839358851313591, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03243107721209526, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02809891290962696, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026948105543851852, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.026673762127757072, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01622437871992588, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013865413144230843, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013757722452282906, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012745514512062073, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012574727647006512, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00851170439273119, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008452270179986954, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008139816112816334, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005564457271248102, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01622437871992588, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01622437871992588, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.32.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10145731270313263, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09512176364660263, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09227311611175537, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08381504565477371, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04731571674346924, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04467012360692024, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.054295897483825684, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05015622824430466, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.047992922365665436, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04265189915895462, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.040685687214136124, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.027537835761904716, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.023958584293723106, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02268829755485058, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02238050475716591, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013782351277768612, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.0116908960044384, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011554580181837082, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010749240405857563, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010555576533079147, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007233997341245413, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007208077237010002, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00683629559352994, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004746774211525917, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013782351277768612, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013782351277768612, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.32.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.24771510064601898, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.23293250799179077, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.2280731350183487, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20757320523262024, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11624792963266373, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.11081710457801819, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1299785077571869, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1198161244392395, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11778596043586731, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10496726632118225, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0999249815940857, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06606977432966232, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05721212923526764, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.055546823889017105, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.055146925151348114, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03296081721782684, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02821669541299343, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02809133008122444, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025884246453642845, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.025626424700021744, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016978852450847626, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016544152051210403, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016352755948901176, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010216320864856243, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016978852450847626, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016352755948901176, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.32.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2063915878534317, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.17600098252296448, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.16572420299053192, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1346261203289032, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09578236937522888, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0837884247303009, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11202641576528549, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1025700569152832, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0988258421421051, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07375490665435791, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06538432091474533, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05791114270687103, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.050180867314338684, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.047119349241256714, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04636630043387413, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.029511868953704834, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026089433580636978, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.025858072564005852, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.022135434672236443, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02166776731610298, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017111441120505333, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.018461832776665688, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016223158687353134, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014791435562074184, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017111441120505333, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016223158687353134, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.32.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17811042070388794, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16785065829753876, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.16463784873485565, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.15015184879302979, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08387386053800583, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08021140843629837, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09305036813020706, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08607760816812515, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08499083667993546, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07591649144887924, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07228162884712219, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.047423794865608215, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.0412449911236763, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.040256958454847336, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.040025100111961365, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02372024767100811, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020845767110586166, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020780930295586586, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019240185618400574, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019095834344625473, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012548360973596573, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012783297337591648, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012226952239871025, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00874431524425745, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012548360973596573, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012548360973596573, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.32.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.22879242897033691, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21565625071525574, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2116439789533615, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19306127727031708, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10750038176774979, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10284681618213654, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1191282570362091, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11019422858953476, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10890352725982666, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09733925759792328, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09268438071012497, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.060583215206861496, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.052628424018621445, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05140164494514465, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05111359432339668, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030219784006476402, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026188062503933907, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026102639734745026, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024084797129034996, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02389652654528618, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015680182725191116, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01539910864084959, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015274743549525738, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009702642448246479, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015680182725191116, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015274743549525738, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.32.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.24796652793884277, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.21507525444030762, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.20067040622234344, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.17645446956157684, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11387348175048828, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09973391890525818, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13932199776172638, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12753605842590332, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11874966323375702, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09461978822946548, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08954858034849167, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07119432836771011, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06146547198295593, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05525267869234085, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05370136722922325, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03588000312447548, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.029474899172782898, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.028903735801577568, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.025776131078600883, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.0247497521340847, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.019648374989628792, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.019989749416708946, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01766984723508358, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014445280656218529, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01766984723508358, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01766984723508358, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.33.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12415807694196701, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11673346906900406, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11396768689155579, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10370458662509918, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05809134989976883, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05524018406867981, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06538096815347672, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.060519903898239136, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05888630077242851, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05254115164279938, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04999774694442749, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03319207578897476, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02887858636677265, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.027795778587460518, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.027533207088708878, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016590725630521774, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01421346515417099, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01411051768809557, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013067844323813915, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012903407216072083, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00864062737673521, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008511601947247982, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008282486349344254, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005428083706647158, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016590725630521774, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016590725630521774, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.33.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10829563438892365, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10178770124912262, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09918534755706787, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.0903099775314331, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0506824292242527, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04810652136802673, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05750627815723419, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05303914099931717, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05138452351093292, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04579416289925575, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04370169714093208, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.029168762266635895, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.025311557576060295, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.024252261966466904, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.024000225588679314, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014572558924555779, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01240528654307127, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012300228700041771, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0114049781113863, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011242429725825787, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007572925183922052, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007459540385752916, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007222700398415327, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004757029470056295, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014572558924555779, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014572558924555779, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.33.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.24556340277194977, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.23142363131046295, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22674433887004852, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20673128962516785, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11531667411327362, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.11011659353971481, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12861238420009613, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11866036802530289, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11685565114021301, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10432542115449905, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09938335418701172, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06528115272521973, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.056641750037670135, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0551009401679039, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.054742034524679184, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03257904201745987, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02799213118851185, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027885232120752335, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025705691426992416, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02547883801162243, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016775788739323616, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01636761799454689, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0162203386425972, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01009877398610115, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016775788739323616, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0162203386425972, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.33.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21726392209529877, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.17494171857833862, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.15896949172019958, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.12529730796813965, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.098270945250988, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08158649504184723, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12108611315488815, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11094219237565994, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10458071529865265, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07201556861400604, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06574863195419312, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06258322298526764, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05375572666525841, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04797695577144623, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04652220383286476, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03153269737958908, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026015961542725563, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.025602780282497406, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.021176280453801155, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.020176397636532784, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017574548721313477, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.018201502040028572, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015806253999471664, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01365696731954813, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017574548721313477, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015806253999471664, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.33.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1883627474308014, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1776038110256195, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1742524355649948, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.15898950397968292, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08874305337667465, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08490532636642456, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09843741357326508, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09100577235221863, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.0898745134472847, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08035130798816681, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07658794522285461, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05017293617129326, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.043604589998722076, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04258030280470848, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.042326126247644424, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02507968060672283, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02202002890408039, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.021947618573904037, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02032637782394886, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.020176945254206657, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013241918757557869, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013451328501105309, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012904828414320946, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.0091552110388875, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013241918757557869, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013241918757557869, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.33.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23504231870174408, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22174780070781708, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21761362254619598, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19867873191833496, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.1104709804058075, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.1057441309094429, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12265609949827194, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11321528255939484, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11190462857484818, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10007679462432861, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09545855969190598, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06232280284166336, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05408390983939171, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.052834365516901016, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.0525367297232151, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03116803802549839, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02695656567811966, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026872403919696808, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02480664849281311, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02461916022002697, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016304653137922287, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01589968428015709, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015896666795015335, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.01009836420416832, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016304653137922287, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015896666795015335, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.33.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2511563301086426, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.2173772156238556, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.20203356444835663, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1776636242866516, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11524573713541031, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10050537437200546, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.14176592230796814, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.13023263216018677, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12039193511009216, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09585420787334442, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09059002995491028, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07241543382406235, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06276979297399521, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05596572160720825, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05425266548991203, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.036416973918676376, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.029858089983463287, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.029212601482868195, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.026130590587854385, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.024991044774651527, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01969718001782894, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.02034730650484562, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01752541959285736, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014630747959017754, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01752541959285736, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01752541959285736, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.34.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12198309600353241, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11455556750297546, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11170019209384918, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10157492756843567, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05705379694700241, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05414855107665062, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06444628536701202, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.059568893164396286, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05784475430846214, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05147980526089668, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04902582988142967, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03272801637649536, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.028461264446377754, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02731095440685749, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.027034204453229904, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016362521797418594, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01399361900985241, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013882400467991829, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012857157737016678, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01267833448946476, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008544459007680416, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00844777561724186, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008161505684256554, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005451128352433443, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016362521797418594, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016362521797418594, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.34.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10363322496414185, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09734410792589188, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09462925046682358, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08610333502292633, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.048485271632671356, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04588058590888977, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05539485067129135, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0511082224547863, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04917546361684799, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.043767768889665604, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.041782576590776443, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.028111472725868225, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.024410877376794815, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.023229919373989105, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.022945795208215714, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014061865396797657, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011908936314284801, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011788041330873966, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010948245413601398, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010766955092549324, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0073373569175601006, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007237625773996115, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006954539567232132, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004659629426896572, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014061865396797657, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014061865396797657, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.34.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.24459323287010193, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.23011082410812378, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22535580396652222, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20531034469604492, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11476686596870422, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.1094745323061943, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12812259793281555, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11821073293685913, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1163022443652153, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10364549607038498, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09873045980930328, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06508311629295349, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.056414078921079636, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05483647808432579, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.054461490362882614, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03246808052062988, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027859166264533997, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02773807756602764, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02554939314723015, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.025316324084997177, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016715222969651222, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01631358452141285, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01612347736954689, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01007327064871788, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016715222969651222, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01612347736954689, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.34.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.20250071585178375, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.16303317248821259, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.14644157886505127, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1176978126168251, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09361381828784943, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07694119215011597, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11477843672037125, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10541661828756332, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09702464938163757, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06629638373851776, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06218792125582695, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05922936275601387, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.051036134362220764, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.045732077211141586, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.044414740055799484, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.029846208170056343, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.024766376242041588, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.024236975237727165, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.019739650189876556, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.018821487203240395, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016564801335334778, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.017264176160097122, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01494967844337225, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01295531541109085, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016564801335334778, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01494967844337225, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.34.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.19478514790534973, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1836455911397934, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.18022510409355164, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.16451281309127808, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.0917426124215126, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08779492229223251, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1017322763800621, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09407120198011398, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09293392300605774, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08311580866575241, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07921966910362244, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.051833029836416245, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04506879299879074, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04400821775197983, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04376153647899628, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.025905590504407883, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.022746775299310684, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.022680096328258514, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02100420743227005, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.020846551284193993, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.0136627908796072, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013885335996747017, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.013315457850694656, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009426954202353954, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.0136627908796072, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.0136627908796072, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.34.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2371436357498169, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22367435693740845, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21958306431770325, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.20039933919906616, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11145783960819244, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10669135302305222, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12349691987037659, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11427449434995651, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11291705071926117, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10100380331277847, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09620614349842072, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06276039034128189, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.0545504130423069, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05328815057873726, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05299082398414612, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.031321361660957336, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.027132539078593254, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.027043871581554413, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024965139105916023, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024775976315140724, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016227709129452705, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.0159223023802042, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015811190009117126, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009981329552829266, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016227709129452705, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015811190009117126, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.34.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2518185079097748, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.21841025352478027, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.20286951959133148, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.17835302650928497, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11557798087596893, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10085119307041168, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.14307767152786255, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.13099443912506104, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.1207338273525238, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.0963166132569313, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09115035831928253, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07304910570383072, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.0631108507514, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05615547299385071, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.054401300847530365, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.037176601588726044, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.03000583127140999, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.029346073046326637, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.026320967823266983, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.025169262662529945, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.020946264266967773, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.020525338128209114, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01888025365769863, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014826913364231586, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014826913364231586, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014826913364231586, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.35.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12399154156446457, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11650795489549637, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11358800530433655, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10343524813652039, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.058040447533130646, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.055085670202970505, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06584150344133377, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06066298112273216, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05884271860122681, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.052418772131204605, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05003145709633827, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03344601392745972, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.029007554054260254, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.027809372171759605, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02751932106912136, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016756823286414146, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014279385097324848, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014159276150166988, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013129685074090958, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012949547730386257, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0087750144302845, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00865907222032547, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00837340485304594, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005626799538731575, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016756823286414146, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016756823286414146, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.35.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10718560963869095, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10071316361427307, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09802746772766113, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08925066143274307, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05019291117787361, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.047573935240507126, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05709722638130188, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05270041525363922, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.050900962203741074, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04531819745898247, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04325332120060921, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.028960198163986206, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02516641467809677, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02403062954545021, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.023764697834849358, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014477796852588654, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012297328561544418, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012185441330075264, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01130344346165657, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01112819742411375, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0075221918523311615, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007422690279781818, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0071501294150948524, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004737568087875843, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014477796852588654, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014477796852588654, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.35.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.24049828946590424, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2263348400592804, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22176767885684967, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.2020263373851776, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11289486289024353, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10776147246360779, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12606023252010345, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11628246307373047, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11441797763109207, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.1020171269774437, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09712470322847366, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06399556994438171, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.055498573929071426, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0539340004324913, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.053566936403512955, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03193683549761772, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027396176010370255, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027273835614323616, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025143155828118324, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0249088816344738, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01643463410437107, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016024531796574593, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015855662524700165, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009860007092356682, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01643463410437107, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015855662524700165, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.35.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.20362620055675507, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.16346503794193268, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.15064087510108948, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.12807150185108185, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09133733063936234, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07580602169036865, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11028143018484116, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.099228136241436, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0963655635714531, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0694221556186676, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06260327249765396, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05672518536448479, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04877684265375137, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.045243456959724426, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04434274882078171, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.029765348881483078, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.025527451187372208, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02534995973110199, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02171190083026886, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.021177323535084724, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.018224401399493217, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.018585603684186935, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.017236458137631416, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015173349529504776, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.018224401399493217, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015173349529504776, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.35.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.20176281034946442, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1902865171432495, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.18669825792312622, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1703951507806778, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09511765092611313, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09099633991718292, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10550148040056229, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09754493832588196, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09636346995830536, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08613461256027222, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08207594603300095, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05379652976989746, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.046733614057302475, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.045629777014255524, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04536476358771324, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02690684236586094, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.023567168042063713, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.023488612845540047, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02174612134695053, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.021577006205916405, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014217959716916084, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014342427253723145, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.0138557692989707, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009687204845249653, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014217959716916084, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014217959716916084, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.35.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.24173443019390106, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22801490128040314, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.22382961213588715, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.20430780947208405, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11371201276779175, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.1088360846042633, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1262018382549286, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11657006293535233, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11519385129213333, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10301346331834793, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09825316071510315, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06431740522384644, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05572596937417984, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05443758890032768, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05412881448864937, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.0321645587682724, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02784694917500019, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.0277582835406065, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.0256433617323637, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.025449218228459358, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016911370679736137, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01653379760682583, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016491392627358437, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010648718103766441, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016911370679736137, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016491392627358437, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.35.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.25726088881492615, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22404168546199799, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.20888438820838928, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18384411931037903, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11808782070875168, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.1035512313246727, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1444011777639389, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.13311579823493958, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12322696298360825, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09884857386350632, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09332893043756485, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07374819368124008, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06388675421476364, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05704621970653534, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05533468723297119, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.037014663219451904, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.029886646196246147, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.029236948117613792, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.026077231392264366, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02492515742778778, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.019821014255285263, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.019704297184944153, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017643945291638374, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013399465009570122, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017643945291638374, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017643945291638374, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.36.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1266956925392151, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11900583654642105, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11607405543327332, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10559812188148499, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.059276677668094635, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05627630650997162, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06704453378915787, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06193144991993904, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.060117028653621674, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0535118542611599, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.051029957830905914, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.034033890813589096, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.029580095782876015, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02839667722582817, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02811248041689396, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01700708456337452, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014555584639310837, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01444398332387209, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01337357610464096, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.013193746097385883, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008879845030605793, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008786140009760857, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008488212712109089, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005665066186338663, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01700708456337452, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01700708456337452, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.36.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10837569832801819, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10187099874019623, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09911864250898361, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09021832793951035, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0507664792239666, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.048084408044815063, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.057853296399116516, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.053359758108854294, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05147623270750046, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.045845724642276764, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04379117488861084, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.029361706227064133, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.025495149195194244, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.024309903383255005, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02402767539024353, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014676354825496674, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012441121973097324, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012321531772613525, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011432841420173645, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011255796067416668, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007627754006534815, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007516011130064726, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0072443136014044285, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004792992956936359, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014676354825496674, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014676354825496674, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.36.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.24496309459209442, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.23049333691596985, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.2258300632238388, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20577383041381836, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11496706306934357, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10972616076469421, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12815698981285095, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1183798611164093, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11653591692447662, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10387983918190002, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09891311824321747, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06510671228170395, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0564810186624527, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05492227524518967, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.054541442543268204, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0324808768928051, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027885369956493378, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027767231687903404, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025578726083040237, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02533877268433571, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016686800867319107, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01629655621945858, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016110708937048912, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010016920045018196, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016686800867319107, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016110708937048912, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.36.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2165294736623764, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.16946488618850708, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1514478623867035, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.123992919921875, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09676533937454224, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07507963478565216, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1202162578701973, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11060287058353424, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10405199229717255, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07151669263839722, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06437927484512329, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06179997697472572, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05328161641955376, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04697839543223381, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04537346214056015, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.031061071902513504, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.024958893656730652, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.024517722427845, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02014758251607418, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.019026849418878555, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016845935955643654, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016997555270791054, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014903532341122627, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.012039844878017902, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016845935955643654, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016845935955643654, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.36.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.20298005640506744, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.19142422080039978, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.18783171474933624, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1714593470096588, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09572243690490723, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09156468510627747, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10613588243722916, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09814666211605072, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09698311239480972, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08668331056833267, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08270286023616791, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.054117538034915924, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.047009386122226715, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.045909129083156586, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.045647162944078445, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02705809473991394, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.023703305050730705, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.023632684722542763, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02187512256205082, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02171192690730095, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014273369684815407, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014415164478123188, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.013914968818426132, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00972554087638855, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014273369684815407, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014273369684815407, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.36.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.24383246898651123, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22998149693012238, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.22575846314430237, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.2061588615179062, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11470591276884079, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.1097935363650322, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1271088570356369, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11759073287248611, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11618753522634506, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10393453389406204, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0991341769695282, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06465675681829453, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05619652569293976, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05490517616271973, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05459010973572731, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.032290730625391006, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02805546671152115, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.027966056019067764, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02582782506942749, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.025633053854107857, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016824526712298393, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01660887524485588, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01639922522008419, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010632861405611038, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016824526712298393, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01639922522008419, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.36.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2556096017360687, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22306601703166962, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.20741243660449982, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1828872561454773, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.1172468438744545, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10266608744859695, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1447911411523819, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.13333624601364136, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12230683118104935, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.0985485315322876, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09326131641864777, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07384561747312546, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.0639243945479393, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.0566292405128479, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05479208379983902, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.036976296454668045, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.029664698988199234, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.028934497386217117, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.025968359783291817, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.024740785360336304, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.019659623503684998, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.019633648917078972, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017300060018897057, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013284807093441486, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017300060018897057, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017300060018897057, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.37.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12595245242118835, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11836361885070801, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11542434245347977, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10508843511343002, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05903217941522598, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.055995307862758636, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06699799746274948, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.061692412942647934, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05985008925199509, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05328720808029175, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05094455927610397, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.034014374017715454, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.029476802796125412, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.028257356956601143, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.027965417131781578, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01701725274324417, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014471902512013912, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01435261033475399, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013298642821609974, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.013113334774971008, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008862155489623547, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008718829602003098, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00845671072602272, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005589191801846027, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01701725274324417, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01701725274324417, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.37.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10974488407373428, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1031917855143547, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10044483840465546, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09153491258621216, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0514235757291317, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04869190976023674, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.058812759816646576, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.054030124098062515, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05213063955307007, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04643573239445686, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0444762222468853, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.029830502346158028, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.025807516649365425, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02463637664914131, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.024351637810468674, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014928349293768406, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01261721272021532, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012499328702688217, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011598317883908749, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011421535164117813, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007792301010340452, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0076314168982207775, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00741871353238821, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004886158276349306, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014928349293768406, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014928349293768406, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.37.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.24603869020938873, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.23160269856452942, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22694118320941925, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20680591464042664, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.1156269833445549, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.110352523624897, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1289963275194168, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11902691423892975, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11718571931123734, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10445254296064377, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09957649558782578, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06562879681587219, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05685775354504585, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05529356747865677, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05492950603365898, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.032744843512773514, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.028166528791189194, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.028038477525115013, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025859225541353226, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02561969682574272, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01690201833844185, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016607020050287247, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016318410634994507, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010418849065899849, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01690201833844185, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016318410634994507, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.37.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2010870724916458, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.16640092432498932, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.15415287017822266, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.12113869190216064, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09195200353860855, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07900463044643402, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11073478311300278, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10115421563386917, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0967172235250473, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06880158185958862, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.060686204582452774, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05703454092144966, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04915310814976692, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04499371349811554, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04398862272500992, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.028718965128064156, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.024516437202692032, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.024246130138635635, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.020275788381695747, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.019599825143814087, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016023831441998482, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.017118297517299652, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01473782304674387, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013164730742573738, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016023831441998482, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016023831441998482, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.37.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2050434648990631, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.19334159791469574, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.18976429104804993, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.17314791679382324, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09671039134263992, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09249278903007507, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10715766996145248, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.0991838350892067, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09798085689544678, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08754967153072357, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08337168395519257, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.054609742015600204, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04748648777604103, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04636211693286896, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.046095799654722214, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.027293771505355835, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02388325333595276, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.023807154968380928, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.0220197644084692, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.021856052801012993, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014338867738842964, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014454705640673637, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.013975949957966805, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009662661701440811, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014338867738842964, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014338867738842964, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.37.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.24437972903251648, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2304867058992386, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.22616827487945557, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.2064422219991684, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11494797468185425, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10999146848917007, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.127375990152359, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11786776036024094, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.1164964810013771, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10409460216760635, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0992269366979599, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06473202258348465, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05628453195095062, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05497431755065918, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.054660435765981674, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03232632204890251, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.027987252920866013, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02790124900639057, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.025748278945684433, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02554483152925968, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016788367182016373, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01642768643796444, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016354728490114212, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010304922237992287, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016788367182016373, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016354728490114212, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.37.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2542118728160858, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22230900824069977, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.2069443315267563, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18259039521217346, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11689455807209015, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10251868516206741, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1446584314107895, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.1327826976776123, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.1219187006354332, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09854075312614441, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.0934111624956131, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07406202703714371, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.0639725849032402, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05667075142264366, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05484703183174133, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03746989369392395, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.030058568343520164, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.029342368245124817, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.026483898982405663, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02528654783964157, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.020469525828957558, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.020316313952207565, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.0181606262922287, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014321643859148026, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.0181606262922287, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.0181606262922287, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.38.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.13676539063453674, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.12859304249286652, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.12553860247135162, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.11427447944879532, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06416565179824829, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0609513595700264, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.07216443121433258, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0668252557516098, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06504646688699722, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05787648260593414, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05512154847383499, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03659204766154289, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.031890083104372025, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.030691510066390038, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.030402302742004395, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.018282640725374222, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.015661805868148804, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.015549304895102978, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.014371161349117756, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01418343000113964, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.009466098621487617, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.009332702495157719, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.009066322818398476, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005876061040908098, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.018282640725374222, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.015549304895102978, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.38.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11711252480745316, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11011122167110443, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10729426890611649, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09774170070886612, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05497578904032707, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05215524509549141, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06230833753943443, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05748428776860237, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05572781711816788, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.049623310565948486, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04741022735834122, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03164403885602951, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.027449453249573708, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026318639516830444, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.026047218590974808, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01581425592303276, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013441921211779118, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01333207730203867, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01234017126262188, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012173125520348549, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008210159838199615, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00805144663900137, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007836943492293358, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005095107946544886, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01581425592303276, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01581425592303276, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.38.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.25018131732940674, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.23554228246212006, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.23089784383773804, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.21030381321907043, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11759991198778152, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.1122860237956047, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.13093966245651245, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.12091310322284698, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11919405311346054, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.1062362864613533, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.10118471086025238, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06658109277486801, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.057727258652448654, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.056203339248895645, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05584143102169037, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03321446478366852, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.028549781069159508, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02844717726111412, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.026189174503087997, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.025953665375709534, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017110949382185936, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016697995364665985, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016540072858333588, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01033477857708931, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017110949382185936, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016540072858333588, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.38.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1933600902557373, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.15089191496372223, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.13362033665180206, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10563089698553085, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08747783303260803, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07009825855493546, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10856515169143677, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.100028395652771, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09266218543052673, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.060389768332242966, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05731794610619545, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05574953928589821, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04808955639600754, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.042377956211566925, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04093688353896141, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02799886465072632, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.022281818091869354, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.021796001121401787, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.017063699662685394, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.015991898253560066, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015051768161356449, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.014920946210622787, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01326252892613411, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010242555290460587, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.017063699662685394, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.014920946210622787, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.38.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.20538128912448883, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.19355513155460358, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.18986335396766663, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1732495129108429, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09690843522548676, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09262517094612122, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10755663365125656, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09939457476139069, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.0981629341840744, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08765488862991333, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08348987251520157, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.054818153381347656, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04759478569030762, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04645411670207977, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.046181272715330124, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02740596979856491, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.023913022130727768, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02383720688521862, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.022029725834727287, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.021862411871552467, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014449529349803925, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014442948624491692, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014079914428293705, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009611763060092926, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014449529349803925, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014449529349803925, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.38.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.24531294405460358, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.23126661777496338, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.22696128487586975, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.20703382790088654, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11545678973197937, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.11038929224014282, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12790988385677338, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.118353471159935, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11695166677236557, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10445910692214966, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0994812399148941, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06505083292722702, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.0565241314470768, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.055191364139318466, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05487588047981262, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03246486932039261, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.028089923784136772, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.027997354045510292, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.025811070576310158, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02561067044734955, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01682743988931179, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.016468804329633713, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016387932002544403, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010292037390172482, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01682743988931179, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016387932002544403, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.38.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.25527718663215637, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22360581159591675, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.20835809409618378, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1840391755104065, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11725588142871857, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.1030968725681305, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.14418655633926392, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.13302583992481232, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12218476086854935, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09893873333930969, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09364216029644012, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07347465306520462, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06375105679035187, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.0565682016313076, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05476215109229088, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.036828331649303436, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02948245406150818, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.028760572895407677, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02582658641040325, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02461167611181736, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.019490769132971764, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01930857077240944, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01716686226427555, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012821600772440434, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01716686226427555, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01716686226427555, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.39.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1339322179555893, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1258053034543991, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.12264559417963028, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.11160784214735031, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06280840188264847, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.059593722224235535, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.07103416323661804, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06562284380197525, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06370408087968826, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05662718042731285, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.053956788033246994, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03606949374079704, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.031369104981422424, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.030079824849963188, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02977280132472515, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.018045425415039062, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.015399334020912647, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.015277490951120853, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.014127987436950207, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.013933159410953522, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00942183192819357, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.009275125339627266, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008998958393931389, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005938700865954161, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.018045425415039062, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.015277490951120853, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.39.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1144842803478241, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10750431567430496, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10466785728931427, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09523573517799377, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.053685519844293594, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05087076127529144, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06106536090373993, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05632233992218971, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05445486307144165, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04842695966362953, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.046193286776542664, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03100019134581089, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.026901263743638992, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0257106963545084, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.025416862219572067, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015495790168642998, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013147865422070026, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01302774716168642, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012062599882483482, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011884849518537521, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008052733726799488, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007912129163742065, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007654018700122833, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0050272089429199696, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015495790168642998, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015495790168642998, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.39.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2459660917520523, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2312663197517395, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22649452090263367, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.2062472105026245, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11550042778253555, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.11015690863132477, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12888793647289276, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11897364258766174, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11711905151605606, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.1041969358921051, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09922394156455994, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06551190465688705, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0567699559032917, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05519712716341019, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05482589453458786, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03266824781894684, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.028004735708236694, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027881499379873276, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025658080354332924, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.025410085916519165, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016782546415925026, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01634056121110916, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016195567324757576, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010003638453781605, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016782546415925026, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016195567324757576, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.39.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21660631895065308, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.15874242782592773, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.13530109822750092, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10608969628810883, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09301940351724625, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.06685268878936768, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12216315418481827, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11087850481271744, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10282943397760391, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06343013793230057, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06125007942318916, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.062258463352918625, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05376352369785309, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04572582244873047, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0436415895819664, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03163234889507294, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02508782409131527, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.024567650631070137, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.019787611439824104, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01833561807870865, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01772696152329445, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.018245283514261246, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015317915007472038, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013712434098124504, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01833561807870865, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015317915007472038, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.39.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.20710648596286774, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.19518280029296875, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.19144198298454285, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.17461112141609192, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09775693714618683, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09344352781772614, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10850498825311661, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10028213262557983, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09904984384775162, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08839345723390579, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08421020209789276, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05532987788319588, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.048053428530693054, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04689544811844826, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04662526398897171, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02772844396531582, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02419189177453518, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.024112530052661896, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02229381538927555, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.022119905799627304, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014729389920830727, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014686962589621544, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01435514260083437, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009866364300251007, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014729389920830727, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014729389920830727, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.39.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.24802404642105103, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.23373088240623474, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.22932547330856323, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.20925050973892212, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11681092530488968, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.11163872480392456, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12973693013191223, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11977493017911911, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11833404004573822, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10560518503189087, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.10060891509056091, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06593003123998642, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05725979804992676, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.055898409336805344, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.055577415972948074, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03303071856498718, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02856621891260147, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.028473466634750366, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.026261163875460625, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.026056963950395584, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01738029532134533, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01691066101193428, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01693669892847538, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010811776854097843, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01738029532134533, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01691066101193428, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.39.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2583252191543579, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22751161456108093, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.21220631897449493, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18762458860874176, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11917129158973694, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10504060983657837, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.14668360352516174, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.13529172539710999, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12394595146179199, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.10109502077102661, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09579622745513916, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07492755353450775, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06522024422883987, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05780447646975517, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05592424049973488, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.037710607051849365, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.03065624088048935, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.0299055278301239, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02713766135275364, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02591947466135025, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.02014697901904583, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.020729202777147293, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017730355262756348, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01462273858487606, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017730355262756348, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017730355262756348, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.40.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1386861652135849, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.13030511140823364, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.12722861766815186, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.11579600721597672, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0651627853512764, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.061918098479509354, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.07327694445848465, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06780035048723221, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.066074900329113, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.058784157037734985, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.055917419493198395, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.037169817835092545, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.03238622844219208, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.031171293929219246, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.030878223478794098, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.018560899421572685, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.015887673944234848, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.015778541564941406, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01456877775490284, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.014382997527718544, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.009591863490641117, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.009444967843592167, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.009185640141367912, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00591676589101553, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.018560899421572685, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.015778541564941406, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.40.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11942431330680847, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11226053535938263, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10946476459503174, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09966550022363663, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05608504265546799, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05325927957892418, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06348051130771637, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05855463445186615, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0568743571639061, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05062956362962723, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0482584647834301, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.032219719141721725, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02797034941613674, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026845313608646393, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.026583148166537285, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016105664893984795, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013705998659133911, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013599657453596592, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012576533481478691, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012406721711158752, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008344720117747784, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008185483515262604, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007975146174430847, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005155926104635, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016105664893984795, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016105664893984795, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.40.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2504676282405853, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.23557181656360626, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.23077259957790375, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.2102602869272232, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11779189109802246, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.11239256709814072, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.13114705681800842, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.12115561217069626, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11936800181865692, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10634268820285797, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.10115812718868256, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06664320081472397, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.057820603251457214, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05627267062664032, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05590515583753586, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03324567899107933, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.028541309759020805, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.028427908197045326, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02615586295723915, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02591697685420513, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017068898305296898, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01662307418882847, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016498683020472527, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010155477561056614, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017068898305296898, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016498683020472527, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.40.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2140706330537796, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.15481328964233398, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1285816878080368, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10324588418006897, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08975061774253845, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.06800027191638947, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1233905553817749, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1134142354130745, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10299237817525864, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06445105373859406, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0625152438879013, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06359192728996277, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05475585162639618, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04399466887116432, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04107850044965744, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03203396871685982, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02389284037053585, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.023149453103542328, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.019262131303548813, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0172268059104681, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01762165129184723, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.017663709819316864, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01435931771993637, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.012630345299839973, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0172268059104681, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0172268059104681, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.40.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2071676105260849, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.19519367814064026, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.19143782556056976, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1745845228433609, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09777753800153732, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09343185275793076, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10862938314676285, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10033582895994186, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09909138828516006, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08838525414466858, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08422033488750458, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05544313043355942, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.0480707548558712, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04690825566649437, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04663064330816269, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.027744360268115997, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02420894429087639, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02413131482899189, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02230052649974823, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.022129682824015617, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014692684635519981, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014717631973326206, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01431647501885891, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00991186685860157, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014692684635519981, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014692684635519981, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.40.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.24761943519115448, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.23334184288978577, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.22900161147117615, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.20869080722332, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11664120107889175, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.1114887073636055, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12934421002864838, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11964932084083557, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.1182028278708458, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10541774332523346, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.10037460178136826, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06580715626478195, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05715782195329666, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.055784840136766434, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05546176806092262, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03283555805683136, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.028415903449058533, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.028323693200945854, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02609620802104473, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02588815614581108, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.017050739377737045, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01670084334909916, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01659664697945118, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010488075204193592, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.017050739377737045, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01659664697945118, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.40.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.25754308700561523, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22735156118869781, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.21167156100273132, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1869834065437317, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11877943575382233, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10466749221086502, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.14803987741470337, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.13570544123649597, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12339794635772705, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.1009739488363266, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09582836180925369, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07600799202919006, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06523564457893372, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05749296396970749, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.055537495762109756, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03860969841480255, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.030284333974123, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.029459740966558456, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02678406611084938, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02549310401082039, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.021379049867391586, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.02030518278479576, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01892656460404396, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013964930549263954, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013964930549263954, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013964930549263954, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.41.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.14244355261325836, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.13394460082054138, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.13075530529022217, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.11896573752164841, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06695293635129929, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.06360495090484619, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.07551908493041992, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06967952102422714, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06789787113666534, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06037905439734459, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.057475652545690536, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03835337609052658, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.03330279886722565, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.03204713016748428, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.03174843639135361, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01917884312570095, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.016357051208615303, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.016243640333414078, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.014999380335211754, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.014811438508331776, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.009981046430766582, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.009749776683747768, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.009550977498292923, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.006149951368570328, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.016357051208615303, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.014999380335211754, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.41.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12261998653411865, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11522854119539261, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11235364526510239, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10228807479143143, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0576494000852108, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05468956008553505, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06534257531166077, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06019776687026024, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05844594165682793, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05198737978935242, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0496489554643631, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0332002229988575, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02877413108944893, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.027607334777712822, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.027330657467246056, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01660330966114998, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014110270887613297, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01399675291031599, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01294667273759842, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012772036716341972, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008634334430098534, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008451721630990505, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008253565058112144, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005366023629903793, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01660330966114998, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01660330966114998, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.41.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.25301462411880493, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.23803769052028656, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.23329493403434753, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.21238042414188385, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11896316707134247, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.1134936660528183, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.13247589766979218, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1223621740937233, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.12057487666606903, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10735299438238144, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.10213540494441986, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06737232208251953, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05840057134628296, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0568636879324913, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05650228634476662, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.033587224781513214, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.028871389105916023, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.028752803802490234, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.026447951793670654, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02621283195912838, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01727285422384739, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01685202494263649, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016700077801942825, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010360049083828926, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01727285422384739, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016700077801942825, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.41.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23745205998420715, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.17628666758537292, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.14962662756443024, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.11996574699878693, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10462766885757446, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07928910851478577, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.13661274313926697, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.12549881637096405, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11456457525491714, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06912819296121597, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06972700357437134, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.07035727798938751, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.06048184260725975, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05105644837021828, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.048595234751701355, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03527214005589485, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027469797059893608, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02672429010272026, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02094438299536705, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.019098876044154167, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.019142352044582367, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.019522899761795998, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016196109354496002, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013970916159451008, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016196109354496002, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016196109354496002, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.41.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.20217204093933105, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.19040018320083618, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.18674618005752563, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1702587902545929, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09546669572591782, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09119730442762375, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10598134994506836, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09799470752477646, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09674618393182755, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08623560518026352, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0821027085185051, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05408114939928055, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.046946536749601364, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04579635709524155, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.045519694685935974, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02702748030424118, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02360595390200615, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.023525420576334, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.021729471161961555, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.021559229120612144, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014239178970456123, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.0143205551430583, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01385981123894453, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009588883258402348, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014239178970456123, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014239178970456123, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.41.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2476148158311844, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.23320360481739044, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.22873608767986298, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.20856797695159912, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11669334769248962, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.1114862933754921, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1294909119606018, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11972499638795853, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11825498938560486, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10540349036455154, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.1003052294254303, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06597529351711273, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05723857507109642, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.055852219462394714, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05552215874195099, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03293120861053467, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02853744477033615, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02844339609146118, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.0262158066034317, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02600826323032379, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.017197607085108757, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01690927892923355, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01673820987343788, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010812594555318356, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.017197607085108757, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01673820987343788, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.41.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2578716278076172, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22758761048316956, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.2124481052160263, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18799282610416412, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11892624944448471, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.1050625815987587, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.14633524417877197, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.13516180217266083, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12365200370550156, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.10119941830635071, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09575007110834122, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07462374866008759, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06495014578104019, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05757376551628113, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.055719662457704544, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.037507057189941406, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.030340518802404404, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02958429418504238, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.026853883638978004, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.025631070137023926, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.020043600350618362, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.02028522454202175, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017713097855448723, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014016926288604736, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017713097855448723, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017713097855448723, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.42.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1382301300764084, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.12981769442558289, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.12661930918693542, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.11516121029853821, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06496627628803253, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.061645034700632095, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.07328563183546066, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06778363138437271, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06590033322572708, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.058523815125226974, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05565720796585083, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03714461997151375, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.032375458627939224, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.031103409826755524, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.03080688789486885, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.018578577786684036, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.015880322083830833, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01576649770140648, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.014547389931976795, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.014358220621943474, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.009643363766372204, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.009483831003308296, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.009226588532328606, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005994259845465422, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.018578577786684036, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01576649770140648, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.42.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11794009804725647, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1107712909579277, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10794815421104431, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09811924397945404, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.055413175374269485, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05254954844713211, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06281234323978424, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05796306952834129, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05618986859917641, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.049936529248952866, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04756654426455498, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03189446032047272, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0276982132345438, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02654477395117283, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.026267563924193382, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015948450192809105, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013578462414443493, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013470699079334736, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012449633330106735, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012276813387870789, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00830290000885725, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008164000697433949, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007928737439215183, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005204260349273682, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015948450192809105, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015948450192809105, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.42.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.25387808680534363, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.23867827653884888, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.23384101688861847, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.21272632479667664, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11945441365242004, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.1138896644115448, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.13308636844158173, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.12289809435606003, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.12108956277370453, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.1076900064945221, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.10244321823120117, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06761302053928375, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.058643389493227005, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05706913396716118, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05669834837317467, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03373098373413086, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.028958596289157867, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.028844507411122322, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.026507964357733727, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.026271585375070572, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01731981709599495, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016871623694896698, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01673140563070774, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010319359600543976, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01731981709599495, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01673140563070774, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.42.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.20074136555194855, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1502537578344345, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.12730248272418976, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10912209004163742, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09112221747636795, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.06952226907014847, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11691798269748688, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10718555748462677, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09681478887796402, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06353689730167389, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.060763999819755554, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06049905717372894, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.051666297018527985, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04430752620100975, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04242955148220062, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.030230451375246048, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02357321046292782, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.022862812504172325, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.018347665667533875, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01696363091468811, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016205135732889175, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016316132619976997, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013780411332845688, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.011423827148973942, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.018347665667533875, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016205135732889175, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.42.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.20019519329071045, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.18836140632629395, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.18469814956188202, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1682300865650177, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09454222768545151, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09025932103395462, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10491832345724106, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09705793857574463, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.0958067774772644, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08532840758562088, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08112627267837524, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05350347235798836, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04651591181755066, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.045367397367954254, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.045095670968294144, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.026775794103741646, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.023425472900271416, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.0233475212007761, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.021554628387093544, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.021381758153438568, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014140235260128975, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014251824468374252, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01376677118241787, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009607872925698757, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014140235260128975, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014140235260128975, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.42.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.24709896743297577, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.23264175653457642, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2281595766544342, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.20783723890781403, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11642155796289444, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.11118799448013306, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12907461822032928, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11946412920951843, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11799099296331406, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10512325912714005, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09993498027324677, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06571049988269806, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05707775801420212, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.055694516748189926, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05536110699176788, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.0327816866338253, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02836107462644577, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.028267372399568558, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02601861022412777, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.025807270780205727, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01699652709066868, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.016662225127220154, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016535582020878792, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010450053960084915, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01699652709066868, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016535582020878792, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.42.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.25916892290115356, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22874228656291962, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.2129473239183426, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18837206065654755, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11967695504426956, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10544582456350327, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1506360024213791, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.13683676719665527, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.1245465949177742, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.101918064057827, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09679857641458511, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07650187611579895, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06602809578180313, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.058214474469423294, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05625051259994507, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.0388604961335659, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.031171603128314018, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.030369199812412262, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.0277607012540102, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.026490094140172005, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.021611489355564117, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.02145780436694622, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.019181735813617706, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.015494255349040031, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.015494255349040031, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.015494255349040031, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.43.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.14833201467990875, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.13881570100784302, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.13518132269382477, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.12241169810295105, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06966345012187958, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.06590662896633148, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.07904967665672302, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.07282040268182755, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.07071301341056824, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06247127428650856, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05949018523097038, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.04026036337018013, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.03488166630268097, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.03342800587415695, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.03308755159378052, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.020155971869826317, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.017225392162799835, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.017086409032344818, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01576058194041252, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0155416838824749, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.010552067309617996, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.010534385219216347, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.010079910047352314, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.006960412487387657, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.017225392162799835, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0155416838824749, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.43.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12025966495275497, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1125037893652916, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10914021730422974, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09882662445306778, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05639393627643585, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05315909907221794, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06471900641918182, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.059631362557411194, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05724808946251869, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.050550010055303574, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04813851788640022, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.032894302159547806, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02854786440730095, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02706918679177761, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.026716310530900955, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01646880805492401, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013956797309219837, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013804184272885323, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012772112153470516, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012543988414108753, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008645243011415005, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008605130016803741, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00817081332206726, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005675255320966244, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01646880805492401, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01646880805492401, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.43.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.24289213120937347, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22769193351268768, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22272703051567078, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20187030732631683, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11422920972108841, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10861292481422424, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12798966467380524, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11789927631616592, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11589916795492172, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.1024504229426384, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09728379547595978, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06511412560939789, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05633075162768364, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.054624829441308975, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.054227519780397415, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.032487835735082626, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02777363359928131, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02763729728758335, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025325393304228783, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.025066561996936798, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016742520034313202, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016303852200508118, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01610926166176796, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010085842572152615, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016742520034313202, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01610926166176796, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.43.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10960536450147629, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09933222830295563, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09620630741119385, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08303927630186081, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.051750559359788895, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04808565974235535, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05851433426141739, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.053661733865737915, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05276104062795639, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04370451346039772, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03997272625565529, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.030282212421298027, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0262807197868824, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02540634758770466, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.025193173438310623, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01538042537868023, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013952533714473248, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01389557495713234, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012464753352105618, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012335953302681446, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008817407302558422, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.009629418142139912, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008556920103728771, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.007694134954363108, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01538042537868023, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01538042537868023, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.43.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.18113332986831665, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.17007169127464294, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.16660332679748535, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.15124286711215973, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08551057428121567, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08149159699678421, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09514171630144119, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08790374547243118, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.0867159441113472, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07687713205814362, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07295960932970047, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.048611413687467575, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04218611493706703, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04109692946076393, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04083625599741936, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.024336006492376328, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.021328626200556755, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.021254731342196465, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.0195876844227314, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01942794770002365, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012924771755933762, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013157734647393227, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01257503591477871, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009071940556168556, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012924771755933762, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012924771755933762, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.43.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.22526343166828156, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21159711480140686, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20730289816856384, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.18821080029010773, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10613292455673218, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10114527493715286, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11797314882278442, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10903119295835495, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10761401057243347, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09542069584131241, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09051579982042313, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06006579473614693, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.052130475640296936, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.050799112766981125, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05048911273479462, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.029999660328030586, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.025912944227457047, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02582189440727234, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02369672991335392, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.023493044078350067, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015563811175525188, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015298404730856419, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015121078118681908, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009686905890703201, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015563811175525188, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015121078118681908, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.43.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2322414666414261, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.1996787190437317, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.1844584047794342, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1622321754693985, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10629647225141525, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09189546853303909, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13295291364192963, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12133324891328812, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11130344122648239, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08799153566360474, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08378373831510544, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06806023418903351, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.058572154492139816, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.0517527312040329, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05003675818443298, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.0344599112868309, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02782665193080902, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.027165452018380165, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.024343429133296013, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.023213529959321022, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.019095197319984436, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.019285842776298523, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016967443749308586, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014095871709287167, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016967443749308586, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016967443749308586, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.44.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.13934488594532013, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.13078579306602478, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.12762334942817688, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.11584343761205673, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06546579301357269, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.06214926019310951, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.07371369004249573, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06810048967599869, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06641770899295807, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.058872152119874954, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05591917037963867, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0374579057097435, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.03255876153707504, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.03136177361011505, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.031079772859811783, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.018720494583249092, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.016039317473769188, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.015932345762848854, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.014677220024168491, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.014495200477540493, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.009743371978402138, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.009613323956727982, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.009346763603389263, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00614817813038826, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.016039317473769188, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.015932345762848854, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.44.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1183353066444397, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1109529659152031, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10816966742277145, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09814935177564621, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.055553268641233444, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.052643418312072754, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06292758136987686, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0580156035721302, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05633578822016716, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04992137476801872, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.047534506767988205, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03197912871837616, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.027718033641576767, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026601210236549377, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.026334036141633987, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01598307117819786, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013601644895970821, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013498453423380852, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012447330169379711, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01227711234241724, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00829849299043417, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008162453770637512, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007924143224954605, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005199673119932413, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01598307117819786, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01598307117819786, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.44.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23929229378700256, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22478224337100983, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22010795772075653, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19988848268985748, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11257348954677582, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10723131895065308, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1255996823310852, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11591693013906479, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11413287371397018, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.1012277826666832, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09618005156517029, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06385248154401779, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.055366870015859604, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.053813088685274124, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05346031114459038, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03186303377151489, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02735300548374653, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027235835790634155, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0250058863312006, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024765849113464355, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016404282301664352, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01602363958954811, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01584745943546295, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009903398342430592, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016404282301664352, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01584745943546295, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.44.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1209457665681839, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10830813646316528, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10423799604177475, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.0865473747253418, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05700770020484924, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05231214314699173, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06524324417114258, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05981620401144028, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.058523520827293396, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.047119610011577606, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04145648702979088, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.033648259937763214, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.029109057039022446, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02781737968325615, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.027512190863490105, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01693020947277546, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.015013856813311577, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014939249493181705, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013135213404893875, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012941460125148296, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.009387017227709293, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.010110298171639442, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008992246352136135, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.007750003598630428, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01693020947277546, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014939249493181705, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.44.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17128661274909973, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16096092760562897, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15773209929466248, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14332014322280884, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08084895461797714, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07710666954517365, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09005089849233627, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08306849002838135, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08196444064378738, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07278653979301453, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0691356435418129, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.046045731753110886, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.039867110550403595, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.0388571098446846, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03861609101295471, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.023059505969285965, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02017088420689106, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020106256008148193, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01854681223630905, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01839601993560791, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012305901385843754, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012438828125596046, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011980198323726654, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008586150594055653, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01854681223630905, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01839601993560791, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.44.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.22321823239326477, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.209779292345047, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20561273396015167, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.18684755265712738, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10510867089033127, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10025941580533981, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11707369983196259, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10796093195676804, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.106581911444664, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09462890774011612, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08995801210403442, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.059669241309165955, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05163796618580818, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05033712461590767, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05002598464488983, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.029848378151655197, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.025739941745996475, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.025657426565885544, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02358323708176613, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.023389901965856552, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015663687139749527, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015300828032195568, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015239914879202843, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009840210899710655, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015663687139749527, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015239914879202843, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.44.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.23125262558460236, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.19940757751464844, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.18397904932498932, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.16290755569934845, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.1058003157377243, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09140490740537643, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13294892013072968, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12136925011873245, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11091619729995728, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08823858201503754, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08426148444414139, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06784304976463318, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05858982726931572, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.051478222012519836, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.049665696918964386, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.0341668501496315, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.027610300108790398, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02691129967570305, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.024258678779006004, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02307838946580887, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.018543122336268425, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01911655068397522, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016259009018540382, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013852867297828197, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.018543122336268425, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016259009018540382, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.45.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12259984016418457, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11496540904045105, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11216921359300613, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10174545645713806, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05754480138421059, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05454593896865845, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06481193751096725, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.059982094913721085, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.058374084532260895, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.051718369126319885, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.049145154654979706, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03294934332370758, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02868136577308178, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02756034955382347, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02729206159710884, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016478504985570908, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014139559119939804, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01403828989714384, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012952553108334541, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012785373255610466, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00858929380774498, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00854896567761898, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008221689611673355, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005546035245060921, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016478504985570908, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016478504985570908, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.45.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10185196995735168, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.0955251008272171, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09297093003988266, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08439912647008896, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04779446870088577, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.045206598937511444, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.054444365203380585, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05018410086631775, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04849548637866974, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04299512505531311, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04096114635467529, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.027670517563819885, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.023994654417037964, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02290472947061062, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.022650249302387238, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013839596882462502, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011741391383111477, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011632714420557022, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010758628137409687, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010591650381684303, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0072108181193470955, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007112369406968355, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0068537029437720776, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004577901680022478, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013839596882462502, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013839596882462502, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.45.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2356545627117157, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22134914994239807, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21669600903987885, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19679132103919983, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.110741026699543, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10544806718826294, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12369951605796814, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11405888944864273, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1122923344373703, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09961544722318649, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09469720721244812, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06283117830753326, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05445517599582672, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05294127017259598, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05258392170071602, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03135917708277702, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026899214833974838, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02678517997264862, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02458963729441166, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02435648813843727, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016136083751916885, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015759769827127457, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015571706928312778, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009733769111335278, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016136083751916885, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015571706928312778, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.45.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.17372769117355347, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1411539614200592, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.12801998853683472, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10459308326244354, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.07960963249206543, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.06622494012117386, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.09778088331222534, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.08985491096973419, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.08370260149240494, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05810905992984772, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05504608154296875, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05050300806760788, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04360079765319824, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.038965094834566116, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.03780164197087288, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.025478266179561615, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.021265890449285507, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02087884582579136, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.017351601272821426, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.016570471227169037, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014171158894896507, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015015550889074802, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.012761675752699375, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.011437063105404377, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.017351601272821426, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015015550889074802, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.45.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1742001175880432, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16380445659160614, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1605546772480011, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14590808749198914, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08215473592281342, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07837902009487152, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.091237373650074, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08436256647109985, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08327502012252808, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.073971688747406, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07028114050626755, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04657416045665741, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.040450409054756165, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.0394430086016655, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.0391998328268528, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.023294948041439056, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020404644310474396, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02033967524766922, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018755875527858734, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018605109304189682, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012285981327295303, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012483896687626839, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011957844719290733, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008499667048454285, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018605109304189682, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018605109304189682, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.45.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2233467400074005, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21000301837921143, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20590709149837494, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1872074007987976, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10508596897125244, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10029315948486328, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11674883961677551, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10790201276540756, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10657419264316559, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09470218420028687, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08994722366333008, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.059443555772304535, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.051581598818302155, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05030148848891258, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04999905452132225, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.029673151671886444, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02564479596912861, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.025560669600963593, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.023486565798521042, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02329697646200657, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015388762578368187, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015112154185771942, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014971147291362286, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009546641260385513, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015388762578368187, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014971147291362286, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.45.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.23456227779388428, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.20319029688835144, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.18775461614131927, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.16622845828533173, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10744620859622955, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09312936663627625, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1350679248571396, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12323148548603058, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11249580979347229, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08993399888277054, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08578462898731232, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06892164051532745, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05942472070455551, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05234013497829437, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.050539687275886536, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.035183604806661606, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02816789224743843, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02746756002306938, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02483833208680153, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.023665105924010277, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.019939031451940536, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01958346925675869, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017829982563853264, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014308927580714226, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017829982563853264, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017829982563853264, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.46.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11917643994092941, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11190671473741531, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10911742597818375, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.0991225391626358, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.055891167372465134, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05302150174975395, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06331457197666168, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.058313023298978806, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.056679755449295044, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05033576488494873, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04798213019967079, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03219745680689812, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02789314277470112, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026786666363477707, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02652461640536785, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016124282032251358, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013730007223784924, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013628995046019554, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012589053250849247, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012419586069881916, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008433764800429344, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008286288939416409, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00806315615773201, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005346961319446564, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016124282032251358, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016124282032251358, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.46.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10105142742395401, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09484527260065079, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09239739179611206, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08395207673311234, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.047402046620845795, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04493481665849686, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05380070209503174, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04960048571228981, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.048096802085638046, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04269663244485855, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04068206995725632, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.027351485565304756, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02372068539261818, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.022708136588335037, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.022465836256742477, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013668196275830269, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01161467656493187, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011517849750816822, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010640988126397133, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010490331798791885, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0071036010049283504, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006986364722251892, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0067688412964344025, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004454056732356548, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013668196275830269, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013668196275830269, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.46.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2300521284341812, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.21615561842918396, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21164998412132263, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19237397611141205, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10807615518569946, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10294654220342636, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12062443792819977, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11131832748651505, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10955314338207245, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09731199592351913, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09251977503299713, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06133360043168068, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.053134892135858536, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.051643356680870056, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05129704996943474, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.030587639659643173, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02622479386627674, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026115862652659416, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024002298712730408, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023764070123434067, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015732916072010994, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015346790663897991, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015186429023742676, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009454440325498581, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015732916072010994, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015186429023742676, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.46.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.19269797205924988, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.15603803098201752, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.14360803365707397, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1181802898645401, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0870664119720459, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0718228816986084, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10639478266239166, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0957486554980278, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09131411463022232, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06545662879943848, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0585625097155571, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.054743412882089615, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04739649221301079, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04351545870304108, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.042565129697322845, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.028765758499503136, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.024985874071717262, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.024729246273636818, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02132311649620533, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.020734241232275963, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017782211303710938, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.018713725730776787, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016783609986305237, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015554094687104225, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017782211303710938, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015554094687104225, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.46.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17916005849838257, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16851550340652466, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.16522298753261566, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.150193452835083, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08451884984970093, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.0806744247674942, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.0939508005976677, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08683698624372482, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08569163829088211, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07616621255874634, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07242997735738754, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04796968027949333, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.041635435074567795, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04059811308979988, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.040356751531362534, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.024001609534025192, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.021017977967858315, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02094772271811962, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019332483410835266, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019177157431840897, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012690230272710323, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012874185107648373, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012355444952845573, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008785253390669823, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012690230272710323, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012690230272710323, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.46.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.22754602134227753, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21408721804618835, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20993103086948395, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19091811776161194, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10714030265808105, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10227932035923004, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11910299211740494, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10998281091451645, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10860344022512436, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09659985452890396, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09199465066194534, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06072909012436867, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.052627380937337875, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05132591351866722, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05102672800421715, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030368143692612648, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026311088353395462, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026225078850984573, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024148577824234962, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.0239543654024601, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015928326174616814, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01571153849363327, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01550179161131382, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010231668129563332, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015928326174616814, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01550179161131382, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.46.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2405695915222168, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.20936214923858643, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.19435398280620575, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1725614219903946, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.1102147027850151, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09618953615427017, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13660137355327606, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12562696635723114, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11520570516586304, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09278374165296555, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08837547898292542, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06981955468654633, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06037560850381851, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05335609242320061, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.051590561866760254, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03513925522565842, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02811664715409279, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.027423959225416183, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.024680692702531815, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.023509085178375244, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.018947230651974678, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.018829818814992905, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016730191186070442, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012990051880478859, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016730191186070442, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016730191186070442, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.47.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1181984469294548, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11095259338617325, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10825604200363159, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09827768802642822, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05543952435255051, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.052636146545410156, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06255175173282623, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05775545537471771, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05621255561709404, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04991161823272705, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.047485459595918655, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03179461508989334, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.027614427730441093, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026550743728876114, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.026294725015759468, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01589568890631199, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013598397374153137, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013498259708285332, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012464744038879871, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012301462702453136, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008294343017041683, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008187009952962399, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007944440469145775, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005269859451800585, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01589568890631199, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01589568890631199, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.47.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09929540753364563, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09320428967475891, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09077499061822891, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08247297257184982, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.046579740941524506, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04410388693213463, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05298452451825142, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.048804398626089096, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04724673181772232, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04194019362330437, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03999074548482895, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.026915602385997772, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.023324279114603996, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.022304125130176544, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02206646092236042, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0134514095261693, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01140614040195942, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0113052474334836, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010454024188220501, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010300633497536182, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006990533322095871, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006861821748316288, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006654930301010609, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004362164065241814, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0134514095261693, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0134514095261693, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.47.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2340649515390396, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.21993763744831085, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21544800698757172, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1957995444536209, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.1099180057644844, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10482072830200195, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12258288264274597, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11313682794570923, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11146396398544312, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09898482263088226, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09410052001476288, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06228701397776604, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05399949103593826, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05251924693584442, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05217960849404335, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.031068284064531326, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026683257892727852, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026572398841381073, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024413466453552246, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024186480790376663, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01595897413790226, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015588641166687012, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015418498776853085, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009610029868781567, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01595897413790226, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015418498776853085, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.47.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2084844559431076, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1571388840675354, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.13718199729919434, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.11884059756994247, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09374295920133591, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0695156529545784, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11670920997858047, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10712435096502304, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10053464770317078, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06655417382717133, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06406570225954056, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.060158174484968185, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.051629580557346344, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04552682861685753, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0439763180911541, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.030203096568584442, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.024157077074050903, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02371606044471264, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.018986620008945465, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.017860477790236473, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016368165612220764, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01643448695540428, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014456972479820251, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.011594298295676708, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.017860477790236473, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016368165612220764, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.47.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.18187454342842102, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.17105746269226074, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.16771644353866577, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.15256217122077942, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08577141910791397, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.0818861797451973, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09543228894472122, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08807161450386047, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08697163313627243, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07734427601099014, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07363877445459366, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.048740170896053314, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.042227908968925476, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04118812456727028, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04094695299863815, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02439195103943348, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.021319091320037842, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.021248335018754005, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019612301141023636, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019457893446087837, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012920759618282318, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013056444004178047, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012584883719682693, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008906790986657143, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012920759618282318, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012920759618282318, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.47.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23108051717281342, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2174788862466812, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21328496932983398, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19397246837615967, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10878264158964157, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10387348383665085, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12086523324251175, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11164651066064835, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11024627834558487, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.0981174185872078, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09337682276964188, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06153152510523796, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05339035019278526, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.052090033888816833, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05178278684616089, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030767690390348434, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02663975954055786, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02655244804918766, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02444484829902649, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.0242448877543211, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016049092635512352, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015822188928723335, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015622671693563461, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010176670737564564, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016049092635512352, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015622671693563461, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.47.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2413918375968933, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.21071627736091614, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.19516970217227936, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.17331291735172272, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11059052497148514, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09647081047296524, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13818636536598206, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12696067988872528, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11552731692790985, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.0933765321969986, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08910470455884933, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07037005573511124, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06090995669364929, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05348828062415123, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05161026865243912, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03528861328959465, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.028113460168242455, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02735595963895321, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.024721885100007057, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.023481275886297226, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.018776386976242065, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.018813330680131912, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01638597436249256, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012831981293857098, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01638597436249256, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01638597436249256, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.48.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11599404364824295, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10892751067876816, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10621751844882965, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.0965506061911583, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05440580099821091, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0516066774725914, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06160609424114227, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05676523223519325, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05517838895320892, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04898825287818909, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04673391953110695, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03131258115172386, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.027133343741297722, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026055337861180305, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02579336054623127, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01565970852971077, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013333633542060852, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013231226243078709, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012217639945447445, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012052023783326149, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008160036988556385, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008001210168004036, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00780084915459156, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005109121557325125, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01565970852971077, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01565970852971077, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.48.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09919755905866623, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09315603971481323, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09077726304531097, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08253806829452515, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04651214927434921, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04407000169157982, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05313216149806976, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04871675372123718, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.047182511538267136, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.041906509548425674, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.040089890360832214, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.026971057057380676, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.023302434012293816, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02229529246687889, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.022060465067625046, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01348840445280075, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.0114141795784235, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011315273120999336, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010467696003615856, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010317292995750904, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007036631461232901, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006882053334265947, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006711540278047323, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004404385574162006, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01348840445280075, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01348840445280075, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.48.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23511001467704773, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2209336757659912, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.2164318859577179, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19675269722938538, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11049792170524597, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10531587153673172, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12325990200042725, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1137215793132782, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11203239113092422, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09950055181980133, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0946679413318634, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06267403811216354, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05433798208832741, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.052841946482658386, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.052482783794403076, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03129029646515846, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026917915791273117, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026810297742486, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024646250531077385, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02441740222275257, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016158655285835266, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01586371660232544, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015611949376761913, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009968125261366367, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016158655285835266, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015611949376761913, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.48.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.18274088203907013, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.14846265316009521, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.13482888042926788, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10929558426141739, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08353680372238159, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.06872089207172394, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10357331484556198, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09435100108385086, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.08819354325532913, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.061624594032764435, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05693338066339493, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05338966101408005, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04596457630395889, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.041067853569984436, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.03984332084655762, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.026866108179092407, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.022572103887796402, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.022175582125782967, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.018604949116706848, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.017789624631404877, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014954451471567154, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016123438253998756, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013420931994915009, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.012465070933103561, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.018604949116706848, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014954451471567154, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.48.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.18634240329265594, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.17530490458011627, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.17183664441108704, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.15639345347881317, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.0879073292016983, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08392053097486496, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09774117171764374, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09025103598833084, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08909736573696136, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07924486696720123, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07546353340148926, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04990331083536148, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04324343800544739, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04217619076371193, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04192332550883293, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02495064213871956, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.021772660315036774, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.021701648831367493, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02001885324716568, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019857730716466904, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013180588372051716, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013261686079204082, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012835538014769554, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008953073993325233, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013180588372051716, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013180588372051716, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.48.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23306666314601898, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21926797926425934, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21500691771507263, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19563916325569153, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10966932773590088, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10473409295082092, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12188202887773514, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11256742477416992, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.1111593171954155, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09886881709098816, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09412510693073273, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06203341856598854, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05379488691687584, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.052472494542598724, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.052169665694236755, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03099343739449978, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026762312278151512, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026677139103412628, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024533314630389214, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024331988766789436, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016148578375577927, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015785180032253265, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015717465430498123, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009997888468205929, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016148578375577927, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015717465430498123, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.48.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2411876916885376, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.21094472706317902, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.19590911269187927, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.17399784922599792, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11080813407897949, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09694437682628632, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13761623203754425, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12650373578071594, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11567328870296478, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09378894418478012, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08943215012550354, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07049298286437988, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.061044901609420776, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05382541939616203, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05198372155427933, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03565814718604088, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02866068109869957, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.027942832559347153, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.0253659226000309, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.024191711097955704, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.019292738288640976, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.019566547125577927, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01696702651679516, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01394944079220295, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01696702651679516, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01696702651679516, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.49.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12762226164340973, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11991104483604431, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11707112193107605, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1063738763332367, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05991090461611748, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05690747871994972, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06734810024499893, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06225455924868584, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06075561046600342, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.053920332342386246, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05124577879905701, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03418028727173805, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02973218820989132, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.028671469539403915, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.028415199369192123, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01706632599234581, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014625747688114643, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014529477804899216, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013389608822762966, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.013232643716037273, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008830830454826355, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008708419278264046, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008476637303829193, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0054993582889437675, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01706632599234581, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01706632599234581, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.49.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10726355016231537, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10077328979969025, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09831348061561584, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08931911736726761, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.050377219915390015, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04780964553356171, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.057017434388399124, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0525023378431797, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.051079269498586655, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.045375365763902664, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04324471950531006, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.028973402455449104, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.025093629956245422, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02411963790655136, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.023890111595392227, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014478953555226326, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012330002151429653, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012238179333508015, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011302437633275986, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011157112196087837, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007521611638367176, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007381386589258909, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007196606602519751, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004693922586739063, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014478953555226326, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014478953555226326, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.49.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2406245768070221, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22617316246032715, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22154219448566437, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20152482390403748, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11306962370872498, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10786706954240799, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12590935826301575, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11627944558858871, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11466529965400696, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10188720375299454, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09686227887868881, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06402506679296494, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05552313104271889, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05405450612306595, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05371773988008499, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03193596750497818, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027466868981719017, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02736331708729267, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02513773739337921, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02491425722837448, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016448531299829483, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01607983373105526, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015914244577288628, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009957283735275269, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016448531299829483, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015914244577288628, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.49.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.182991161942482, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.14206552505493164, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.12435398250818253, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10189071297645569, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08031728863716125, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.06274077296257019, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10460730642080307, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09639992564916611, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.08809839189052582, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.060229234397411346, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.056119415909051895, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05401802808046341, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.046464670449495316, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.03908570483326912, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.037165626883506775, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02720833197236061, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.020861081779003143, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0202929750084877, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01705050654709339, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01571105420589447, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01480699423700571, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.014669090509414673, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.012491761706769466, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010274027474224567, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01705050654709339, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01571105420589447, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.49.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.18831610679626465, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.17705509066581726, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.17357544600963593, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.15788733959197998, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08883512020111084, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08478587865829468, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09886693954467773, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09122316539287567, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09007640182971954, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.080046147108078, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07611832022666931, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.050500549376010895, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04372011125087738, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04263833165168762, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04238462448120117, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.025286441668868065, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02201559767127037, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.021942323073744774, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.020232893526554108, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.020070256665349007, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013438588008284569, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013404203578829765, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.013089198619127274, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009053342044353485, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013438588008284569, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013438588008284569, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.49.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2349271923303604, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22095845639705658, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21665088832378387, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19704043865203857, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11058443039655685, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10557197779417038, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12286420166492462, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11351064592599869, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11208606511354446, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.0996607095003128, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09477486461400986, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06254502385854721, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.054239869117736816, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.052907370030879974, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.0525994636118412, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.031234076246619225, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026967478916049004, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.0268806591629982, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02470625378191471, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024509228765964508, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016226962208747864, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01588425785303116, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015788691118359566, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.01002148725092411, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016226962208747864, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015788691118359566, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.49.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.24462658166885376, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.21422113478183746, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.19953961670398712, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.17706622183322906, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11228664219379425, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09865207225084305, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1385851353406906, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.1275375336408615, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11706870794296265, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09497826546430588, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09031978249549866, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07065334916114807, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06115860491991043, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.054192814975976944, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.052444856613874435, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03544193133711815, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.028272708877921104, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.027574852108955383, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.024817101657390594, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.023645736277103424, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01878691464662552, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01855725608766079, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016537372022867203, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012365030124783516, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01855725608766079, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016537372022867203, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.50.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12412293255329132, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11650552600622177, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11370720714330673, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1032535582780838, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05827533081173897, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.055276352912187576, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06572437286376953, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06072704493999481, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05910484492778778, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05244334787130356, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.049860503524541855, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03342382609844208, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.029050076380372047, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.027913859114050865, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02763378992676735, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01671898551285267, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014287238009274006, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014181249774992466, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013089568354189396, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012917179614305496, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008721057325601578, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008596448227763176, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008347713388502598, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005518051795661449, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01671898551285267, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01671898551285267, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.50.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1044597327709198, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09807375818490982, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09558120369911194, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08684659749269485, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04901796579360962, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04646854102611542, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.055620960891246796, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05126378312706947, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04972176253795624, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04414193704724312, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0420248918235302, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.028236163780093193, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.024494720622897148, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.023472290486097336, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.023233067244291306, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014117390848696232, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012000849470496178, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011899631470441818, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010992531664669514, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01083458587527275, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007327245082706213, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007201799191534519, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0069849626161158085, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0045707649551332, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014117390848696232, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014117390848696232, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.50.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23693083226680756, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22254280745983124, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21794916689395905, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19804978370666504, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11127692461013794, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10603819787502289, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12417341768741608, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11454146355390549, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11282121390104294, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10015829652547836, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09523728489875793, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06304892152547836, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05466843023896217, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.053174976259469986, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05281946808099747, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03145495802164078, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.0269789956510067, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02686743065714836, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024663448333740234, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024438468739390373, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016159120947122574, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01573544554412365, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015602584928274155, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009642972610890865, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016159120947122574, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015602584928274155, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.50.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21880726516246796, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.14317819476127625, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11484987288713455, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10113023966550827, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09079655259847641, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.062120288610458374, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1264253556728363, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11060956120491028, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10147468000650406, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06196827441453934, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06202974542975426, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0631440058350563, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05599315091967583, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04728827625513077, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.044951148331165314, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03355266898870468, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.029784871265292168, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.029249127954244614, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025632742792367935, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02432185411453247, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.021003693342208862, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.024915236979722977, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01862872950732708, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.021794121712446213, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01862872950732708, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01862872950732708, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.50.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.19180619716644287, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.18040621280670166, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.17686045169830322, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.16087673604488373, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09056145697832108, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08644016087055206, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10088644176721573, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09297065436840057, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09181246161460876, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08161767572164536, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07766184210777283, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.051619600504636765, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04461776837706566, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.043515514582395554, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04325239360332489, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.025890523567795753, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02255096286535263, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.022472839802503586, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02074742689728737, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02058415859937668, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013891675509512424, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013843996450304985, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01354163233190775, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009496002458035946, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013891675509512424, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013891675509512424, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.50.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2381131947040558, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22401867806911469, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21969753503799438, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19981737434864044, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11213212460279465, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.1070815846323967, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1250394731760025, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11512129753828049, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11368267983198166, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10110427439212799, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09628506004810333, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06366011500358582, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05510563775897026, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.053756874054670334, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05343318730592728, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03190077841281891, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.027535803616046906, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.027446232736110687, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.025264669209718704, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.025065764784812927, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01685675047338009, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01642049103975296, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016417669132351875, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.01066453754901886, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01685675047338009, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016417669132351875, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.50.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.25050023198127747, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.2201324701309204, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.2055910974740982, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18222540616989136, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11536186933517456, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10174310952425003, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.14183886349201202, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.13035716116428375, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.120144784450531, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09779811650514603, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09292007982730865, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07250183820724487, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06276052445173264, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.0559108667075634, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05418917536735535, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03654977306723595, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.029545661062002182, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.028874091804027557, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.026103265583515167, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02497538924217224, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01959388703107834, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.019805731251835823, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017341023311018944, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013861604034900665, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017341023311018944, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017341023311018944, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.51.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12993112206459045, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1220599040389061, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11917950958013535, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10824425518512726, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.061042964458465576, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05800480768084526, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06859762221574783, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06341374665498734, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06189858913421631, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05494818463921547, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0521610789000988, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03481854870915413, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.030283687636256218, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02921670489013195, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02895139530301094, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01739605888724327, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014889616519212723, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01479348260909319, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013624262996017933, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.013459979556500912, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008996384218335152, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008838653564453125, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008634951896965504, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00554241007193923, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01739605888724327, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01739605888724327, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.51.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1107402890920639, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10403358936309814, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10153685510158539, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09220629185438156, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05201612412929535, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.049393460154533386, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.058787669986486435, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05417414754629135, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.052751123905181885, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04684765636920929, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04457793012261391, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.029877036809921265, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0258918646723032, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02491219900548458, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.024676619097590446, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014925806783139706, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012705535627901554, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012615625746548176, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011640745215117931, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011491735465824604, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007740585133433342, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007579631172120571, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00741309579461813, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004776841029524803, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014925806783139706, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014925806783139706, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.51.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.24107427895069122, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22658364474773407, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22205550968647003, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20172327756881714, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11332295089960098, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10802585631608963, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12619957327842712, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11655702441930771, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1148928552865982, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10203848034143448, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09687815606594086, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06410852819681168, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05563564598560333, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05415591597557068, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.053798798471689224, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.031977176666259766, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027475079521536827, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02736259624361992, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025124365463852882, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024894632399082184, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016428660601377487, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01600726507604122, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015889136120676994, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009801134467124939, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016428660601377487, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015889136120676994, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.51.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.20333753526210785, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.147175595164299, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1212652176618576, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09957876801490784, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0882270410656929, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.06455285847187042, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11830329895019531, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10892398655414581, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09799353778362274, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.060086023062467575, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06006962060928345, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.060643620789051056, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05238451808691025, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.042955994606018066, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.040446337312459946, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03039741702377796, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02285095863044262, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0220719613134861, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.017535081133246422, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.015641851350665092, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016199404373764992, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016223495826125145, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013186173513531685, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.011087354272603989, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.017535081133246422, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.015641851350665092, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.51.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.19440382719039917, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.18277665972709656, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.17920994758605957, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.16285522282123566, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09179772436618805, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08757317811250687, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1023297905921936, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09426575154066086, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09306053072214127, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08270902931690216, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07872792333364487, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.0523030124604702, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.045193467289209366, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04406534880399704, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04380100220441818, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.026218345388770103, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.022782940417528152, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02270425483584404, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.020938225090503693, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.020772593095898628, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013988735154271126, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013916568830609322, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.013630617409944534, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009452286176383495, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013988735154271126, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013988735154271126, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.51.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23837681114673615, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2242334932088852, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21981707215309143, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19980235397815704, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11226938664913177, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10714659839868546, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12486275285482407, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11525025218725204, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11382127553224564, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10115663707256317, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09616909176111221, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06360066682100296, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05510614439845085, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05374624952673912, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.053418707102537155, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.0317692868411541, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02741088904440403, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02731963060796261, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.025123214349150658, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024913635104894638, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016546206548810005, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.016188709065318108, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01609547808766365, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010265081189572811, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016546206548810005, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01609547808766365, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.51.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.24935168027877808, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.2199191451072693, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.2051648497581482, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18151798844337463, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11503936350345612, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10154864937067032, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.14235253632068634, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.13070359826087952, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11963999271392822, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09771288931369781, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09294541925191879, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07302190363407135, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06293556094169617, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05579889938235283, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05400487408041954, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03703048452734947, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.029566412791609764, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02883720211684704, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.026183733716607094, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.025007855147123337, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.02025374211370945, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.019959786906838417, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017974555492401123, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014041443355381489, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017974555492401123, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017974555492401123, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.52.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.13398799300193787, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.12582890689373016, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.12281221151351929, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1114611029624939, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06297009438276291, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05978141352534294, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.07091907411813736, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06546096503734589, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06385360658168793, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05662335455417633, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0537608377635479, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03601790592074394, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.031280215829610825, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.030139999464154243, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.029870878905057907, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.018003402277827263, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.015385028906166553, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01528235338628292, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.014077028259634972, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01390310563147068, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00935332290828228, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00917021557688713, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008967181667685509, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005786549765616655, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.018003402277827263, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01528235338628292, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.52.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11403048038482666, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10702662169933319, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10441344976425171, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09484747052192688, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05360507220029831, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.050859350711107254, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06067626550793648, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.055844731628894806, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05435645952820778, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.048207562416791916, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04593203216791153, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.030857522040605545, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02669309824705124, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.025668315589427948, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.025424279272556305, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015416901558637619, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013109581544995308, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013014757074415684, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012003306299448013, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011848686262965202, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008017225190997124, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007836362347006798, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0076718516647815704, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004960366524755955, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015416901558637619, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015416901558637619, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.52.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.24479615688323975, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22997352480888367, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22525599598884583, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20451733469963074, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11500340700149536, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10964929312467575, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1280754655599594, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11829082667827606, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11660197377204895, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10351233929395676, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0983034297823906, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.065087229013443, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05645580217242241, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05497569963335991, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05462414771318436, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.032462313771247864, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027911968529224396, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027807166799902916, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025513818487524986, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02528098225593567, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016697227954864502, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016301702708005905, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016151530668139458, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010035530664026737, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016697227954864502, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016151530668139458, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.52.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23037052154541016, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1672797054052353, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.14075101912021637, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.11698785424232483, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10278269648551941, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07567248493432999, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1312011033296585, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.12039995938539505, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11112652719020844, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06632336974143982, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0667273998260498, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06745216995477676, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05798976123332977, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04999377578496933, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.047947607934474945, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03382580354809761, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02665516920387745, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02602282166481018, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0198625810444355, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.018283315002918243, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.018253512680530548, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01847974769771099, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015730947256088257, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013071703724563122, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.018283315002918243, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015730947256088257, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.52.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.19050832092761993, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1791398674249649, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.17557604610919952, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1595693677663803, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08996555954217911, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08581894636154175, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10015634447336197, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.0924263671040535, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09123071283102036, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08103923499584198, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07704494148492813, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.051186703145504, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04430908337235451, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04319549724459648, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04293486848473549, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.025615880265831947, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02232413925230503, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.022243959829211235, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.020506327971816063, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02034030668437481, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013549796305596828, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013617226853966713, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.013188619166612625, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00921675842255354, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013549796305596828, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013549796305596828, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.52.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23888519406318665, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22469662129878998, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.22024445235729218, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.20019209384918213, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11260364204645157, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10745814442634583, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12538418173789978, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11561550945043564, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11414949595928192, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10142558813095093, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09639319032430649, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06395650655031204, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05532681196928024, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.0539519265294075, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05362887308001518, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.031957779079675674, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02761686220765114, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02752208709716797, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02531462535262108, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.025110386312007904, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016744127497076988, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.016454672440886497, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016293145716190338, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010640239343047142, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016744127497076988, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016293145716190338, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.52.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.25151243805885315, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.2219928354024887, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.20791545510292053, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1840672791004181, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11618246883153915, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10291209816932678, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1420716643333435, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.13080798089504242, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12071295827627182, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.0987253487110138, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09353820234537125, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07258820533752441, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.0629701167345047, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.056238818913698196, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05456387251615524, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03653755784034729, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.029626348987221718, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02895999699831009, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.026181908324360847, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02507897838950157, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.019590597599744797, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.019704433158040047, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01743718422949314, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013660292141139507, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01743718422949314, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01743718422949314, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.53.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1313660591840744, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.12332892417907715, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.12028224766254425, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10907348245382309, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0617738701403141, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05858341231942177, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06945683807134628, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06427158415317535, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06265472620725632, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05548376962542534, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05263412371277809, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03526479750871658, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.030700908973813057, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02957850880920887, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.029304364696145058, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01762796752154827, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.015097660943865776, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014996341429650784, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013799430802464485, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.013629144988954067, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.009140346199274063, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.009006625041365623, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008763773366808891, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005700242705643177, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01762796752154827, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014996341429650784, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.53.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11147060990333557, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10458528995513916, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10198965668678284, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09250637143850327, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.052367474883794785, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.049658238887786865, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.059260234236717224, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05462457984685898, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05313289538025856, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04707273095846176, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0447634756565094, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.030103523284196854, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.026104778051376343, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.025088131427764893, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02484292723238468, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015045003965497017, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012823558412492275, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012726925313472748, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01172716449946165, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01157141663134098, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007822721265256405, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007682837545871735, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007482848595827818, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004881821107119322, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015045003965497017, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015045003965497017, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.53.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.24575050175189972, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2307496815919876, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22607596218585968, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.2051277756690979, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11557448655366898, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.11013182252645493, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1286666989326477, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11885838210582733, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11718761175870895, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10393030196428299, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09862056374549866, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06537329405546188, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.056737057864665985, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05522496998310089, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.054872702807188034, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.032609518617391586, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.028010431677103043, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027901552617549896, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0255765151232481, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02534567005932331, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016741300001740456, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016320642083883286, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01618815027177334, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00998428463935852, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016741300001740456, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01618815027177334, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.53.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1946033388376236, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.15138599276542664, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.13021409511566162, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.11347018927335739, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08917999267578125, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.06875213235616684, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11492966115474701, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1056978777050972, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09394114464521408, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06561337411403656, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06138501688838005, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.059270963072776794, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05104605108499527, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.043503981083631516, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04157213121652603, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.029717853292822838, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.023342080414295197, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02255590818822384, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01892033778131008, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.017557306215167046, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016029683873057365, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016444038599729538, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013631829991936684, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.011762198060750961, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.017557306215167046, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016029683873057365, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.53.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1900850087404251, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.17852787673473358, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.17494457960128784, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1588643193244934, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08980391919612885, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08560867607593536, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1000065878033638, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09223786741495132, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09103894978761673, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08077551424503326, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07669053226709366, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.051096003502607346, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04425406455993652, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04312920197844505, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.042868852615356445, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02559741586446762, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02230582945048809, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.0222308412194252, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.020480824634432793, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.020309412851929665, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013587030582129955, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01364078652113676, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.013221890665590763, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009280752390623093, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013587030582129955, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013587030582129955, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.53.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2385527789592743, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22420154511928558, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2197454273700714, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19949086010456085, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11246741563081741, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10725944489240646, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1250765323638916, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11549047380685806, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.1140277087688446, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10118533670902252, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09603920578956604, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06368014216423035, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05521928519010544, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05383089929819107, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05350808799266815, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.031804583966732025, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.027454983443021774, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.027360806241631508, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.025120673701167107, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024910783395171165, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016505571082234383, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.016198867931962013, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01604699343442917, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010247080586850643, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016505571082234383, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01604699343442917, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.53.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2566768527030945, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22613011300563812, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.2114161103963852, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18688583374023438, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11855719238519669, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10488927364349365, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.14650095999240875, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.13380445539951324, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12333766371011734, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.10053735226392746, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09523116797208786, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07458798587322235, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06449675559997559, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05756499990820885, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.055817872285842896, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03765489161014557, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.030587628483772278, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02991427108645439, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.027086354792118073, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.025960762053728104, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.020405983552336693, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.020672103390097618, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01817174069583416, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01472936850041151, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01817174069583416, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01817174069583416, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.54.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.13421282172203064, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.12586283683776855, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.12283467501401901, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.11132529377937317, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06307265162467957, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.059804923832416534, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.07102315872907639, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06562913209199905, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06397107988595963, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05662952736020088, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05370141565799713, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03607051447033882, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.03137222304940224, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.030193695798516273, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.029911287128925323, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.018014023080468178, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.015415667556226254, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.015312067233026028, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.014083080925047398, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01390930637717247, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.009332763962447643, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.009204714559018612, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008937768638134003, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005820493679493666, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.018014023080468178, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.015312067233026028, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.54.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11279582977294922, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10578958690166473, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10311761498451233, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09347452223300934, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0530225895345211, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05023249238729477, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06012817844748497, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05535738542675972, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05379205942153931, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04760439321398735, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04528609663248062, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.030553454533219337, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.026465578004717827, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.025393834337592125, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.025136109441518784, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015271632932126522, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01297763828188181, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012875883840024471, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011865136213600636, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011697840876877308, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007931459695100784, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007775242906063795, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0075726862996816635, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004930539987981319, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015271632932126522, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015271632932126522, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.54.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.25189974904060364, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.23638762533664703, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.23152567446231842, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20987217128276825, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11845774948596954, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.112815260887146, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1319265514612198, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.12184321880340576, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.12008875608444214, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10638553649187088, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.10084829479455948, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06703154742717743, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.058172017335891724, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05661296844482422, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05624571070075035, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03342638164758682, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.028742222115397453, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.028633100911974907, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02622970938682556, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.025987233966588974, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017183559015393257, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016772164031863213, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01661638915538788, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010301370173692703, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017183559015393257, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01661638915538788, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.54.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.20599687099456787, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.14761430025100708, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11847630888223648, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10216579586267471, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08872703462839127, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.061769261956214905, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12305903434753418, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11267857253551483, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0991818979382515, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06227165833115578, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0628458708524704, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06342503428459167, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05426806956529617, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.043261051177978516, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.040278881788253784, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.031750842928886414, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02308078669011593, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.022086363285779953, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01810643821954727, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.015902498736977577, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016908463090658188, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016735387966036797, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013304783031344414, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.011358237825334072, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01810643821954727, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.015902498736977577, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.54.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.18738649785518646, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.17590630054473877, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.17235702276229858, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.15654000639915466, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.0885472521185875, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08440911769866943, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09861727803945541, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09098048508167267, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08976669609546661, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07961393892765045, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07565833628177643, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05040378123521805, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.043649982661008835, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04254019260406494, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04228076711297035, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.025240853428840637, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02201082371175289, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.021936023607850075, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.020198728889226913, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.020037289708852768, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01338431891053915, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013468912802636623, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.013020483776926994, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00916790496557951, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01338431891053915, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01338431891053915, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.54.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2366136759519577, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22223861515522003, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21778050065040588, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19781753420829773, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11161933839321136, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.1064242273569107, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1242201030254364, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11459694057703018, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.1131543219089508, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10037961602210999, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09534254670143127, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06333636492490768, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.054841093719005585, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05346529930830002, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05313662067055702, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03164074942469597, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.027328934520483017, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.027238570153713226, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.025019627064466476, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02480754256248474, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.0165229644626379, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.016232917085289955, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016070645302534103, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010419589467346668, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.0165229644626379, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016070645302534103, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.54.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.24362820386886597, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.21440091729164124, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.19908583164215088, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1760159283876419, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.1122903898358345, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.0986051857471466, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.14039872586727142, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12889614701271057, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11695195734500885, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09529617428779602, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09052573889493942, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07182024419307709, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06189896911382675, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05434873327612877, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05245101451873779, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.036166366189718246, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02857203036546707, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.027777252718806267, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.025210628286004066, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.023944491520524025, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01952969655394554, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.019116191193461418, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017129428684711456, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013021563179790974, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017129428684711456, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017129428684711456, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.55.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.13089843094348907, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.12272458523511887, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11962398141622543, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1084725558757782, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06153648719191551, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.058268919587135315, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06950809806585312, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06421948969364166, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06242486461997032, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0552494116127491, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05245419591665268, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03531181812286377, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.03069552220404148, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.029467584565281868, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.029173143208026886, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01765572838485241, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.015055571682751179, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014944369904696941, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013762760907411575, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.013574567623436451, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.009167993441224098, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.009019891731441021, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008758153766393661, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005720941815525293, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01765572838485241, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014944369904696941, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.55.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11210402846336365, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10509265959262848, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10242128372192383, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09284713864326477, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05267084389925003, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.049902353435754776, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05964091792702675, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05504481866955757, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05344643071293831, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04730113595724106, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04498233273625374, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03034224733710289, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.026327157393097878, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.025246333330869675, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02498532086610794, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015162636525928974, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01290833204984665, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012807619757950306, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011797711253166199, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011635108850896358, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007881687954068184, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007748141419142485, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007527896203100681, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004933555144816637, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015162636525928974, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015162636525928974, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.55.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.24340221285820007, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22837285697460175, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22366014122962952, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20288261771202087, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.1144806295633316, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10900092124938965, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12743790447711945, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11779317259788513, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11607569456100464, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10282087326049805, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09753308445215225, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06477580964565277, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05623979493975639, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.054708246141672134, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05434727296233177, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03231101110577583, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.0277450829744339, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027635939419269562, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02531103789806366, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.025074316188693047, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016583526507019997, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01614747755229473, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016022155061364174, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009849229827523232, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016583526507019997, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016022155061364174, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.55.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.19611844420433044, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.15516865253448486, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.13835594058036804, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.11436127871274948, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0888085886836052, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07120024412870407, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11302564293146133, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10220489650964737, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09372091293334961, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06533736735582352, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06122814863920212, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05783506855368614, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.049795810133218765, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04381367191672325, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04230693355202675, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.029481930658221245, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02439379133284092, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.023862402886152267, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.020256822928786278, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01924072951078415, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017012624070048332, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.017848066985607147, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015283660963177681, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013988816179335117, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017012624070048332, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015283660963177681, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.55.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.18464379012584686, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.17334020137786865, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.16978029906749725, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1541074961423874, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.0872362032532692, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08311941474676132, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09728626906871796, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08968690037727356, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08848098665475845, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07839015126228333, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07445916533470154, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04972696304321289, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04301929101347923, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.041913289576768875, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.041648466140031815, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02489064447581768, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.021700920537114143, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02162732556462288, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019911857321858406, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01974857598543167, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013209610246121883, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01330462284386158, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012849110178649426, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009072341024875641, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013209610246121883, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013209610246121883, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.55.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23378829658031464, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21955466270446777, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2151070535182953, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19522210955619812, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11028103530406952, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.1051105484366417, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12293422967195511, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11330003291368484, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11183955520391464, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09914139658212662, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09417744725942612, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.0627053827047348, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05425538867712021, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.052872080355882645, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05254671722650528, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.031360089778900146, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.027116522192955017, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.027024338021874428, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02483092062175274, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024623950943350792, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016481652855873108, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.016242699697613716, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016029926016926765, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010607699863612652, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016481652855873108, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016029926016926765, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.55.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2551281750202179, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22434547543525696, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.208666130900383, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1841285079717636, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11783258616924286, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10349013656377792, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.14770889282226562, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.1344762146472931, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12267360091209412, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09972835332155228, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09458944946527481, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.0753345638513565, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06478524953126907, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05715065076947212, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.055208027362823486, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03842892125248909, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.030229199677705765, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.029425809159874916, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02668110467493534, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.025396248325705528, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.021414149552583694, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.020403001457452774, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01904868520796299, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014211518689990044, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014211518689990044, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014211518689990044, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.56.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1311531364917755, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1229202151298523, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11978078633546829, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10852022469043732, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06163375824689865, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.058367837220430374, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06960596144199371, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0643884539604187, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0625360980629921, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05527777224779129, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05242513120174408, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0353393629193306, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0307619571685791, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02952619642019272, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.029224863275885582, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.017661048099398613, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.015100846067070961, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014984850771725178, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013786252588033676, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.013598889112472534, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00918613187968731, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.009070096537470818, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00877848919481039, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0057805385440588, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.017661048099398613, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014984850771725178, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.56.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1089162528514862, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10206468403339386, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09932006150484085, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08996538072824478, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05121038109064102, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04841834679245949, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.0582815445959568, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05370534583926201, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05194130167365074, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.045939452946186066, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04366760700941086, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.029626348987221718, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.025693673640489578, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.024555528536438942, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.024280041456222534, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01481927651911974, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012586405500769615, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012475023046135902, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01150781475007534, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011333449743688107, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007740065921097994, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007624146528542042, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007367439568042755, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004903833847492933, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01481927651911974, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01481927651911974, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.56.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2473827451467514, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.23199644684791565, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22717003524303436, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.2059447169303894, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11634979397058487, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.11075904965400696, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12976503372192383, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11980091780424118, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11801815778017044, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10447470098733902, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09902278333902359, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06593063473701477, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.057184360921382904, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05561889708042145, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.055250853300094604, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.032876402139663696, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.028220284730196, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.028103642165660858, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025738965719938278, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02549329213798046, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016873441636562347, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016462581232190132, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016293682157993317, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010077808052301407, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016873441636562347, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016293682157993317, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.56.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.19586710631847382, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.15533721446990967, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.13767680525779724, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1037261113524437, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0895284041762352, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07071284204721451, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11186268925666809, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10294438898563385, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09401198476552963, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06230717524886131, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05737612023949623, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05731974169611931, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04940357059240341, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04324131831526756, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.041675835847854614, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.028651895001530647, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02263432741165161, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.022017916664481163, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.017346492037177086, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0161572378128767, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01519252359867096, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015069041401147842, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01319201197475195, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010082696564495564, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.017346492037177086, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015069041401147842, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.56.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.18455052375793457, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.17324768006801605, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1697186678647995, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.15408888459205627, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.0872083380818367, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08311194181442261, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.0971168503165245, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08962738513946533, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08844521641731262, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07836834341287613, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07437614351511002, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.049645423889160156, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04300256446003914, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04189355671405792, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04163364693522453, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02484015002846718, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02168475091457367, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02160961739718914, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019901227205991745, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019732138141989708, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01314451266080141, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01328607089817524, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012781466357409954, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009060297161340714, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01314451266080141, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01314451266080141, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.56.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.22539128363132477, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21173174679279327, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2074880748987198, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.18827852606773376, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10635194182395935, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10140212625265121, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11856409162282944, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10929031670093536, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10785500705242157, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09560640156269073, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0907677486538887, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.060479652136564255, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.0522964745759964, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05096268653869629, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05064428970217705, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030239839106798172, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026084449142217636, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.025995459407567978, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.023872291669249535, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.023669715970754623, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.0158598143607378, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015537971630692482, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015426375903189182, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010021300986409187, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.0158598143607378, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015426375903189182, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.56.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.24820128083229065, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.21688605844974518, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.20146030187606812, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.17751917243003845, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11417459696531296, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10003618896007538, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.14192765951156616, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.13039377331733704, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11927707493305206, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09618230909109116, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09109050035476685, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07254759967327118, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.0626894012093544, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05530036985874176, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05343940109014511, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03645670786499977, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02915338985621929, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.028419330716133118, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.025620415806770325, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.024384867399930954, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01953818090260029, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.019554927945137024, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017184335738420486, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013490849174559116, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017184335738420486, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017184335738420486, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.57.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12849822640419006, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.12048820406198502, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11736907064914703, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10626639425754547, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.060356490314006805, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05715850368142128, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06818938255310059, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06310004740953445, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.061231233179569244, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.054138731211423874, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05131116509437561, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03461364284157753, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.030145222321152687, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.028908370062708855, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.028616217896342278, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.017302237451076508, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01479586772620678, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014677443541586399, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013519005849957466, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0133279450237751, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.009004046209156513, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0089016854763031, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008593752048909664, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005687107797712088, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.017302237451076508, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.017302237451076508, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.57.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10597579926252365, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09930781275033951, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09665822237730026, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08759690076112747, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0497894212603569, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.047103073447942734, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05662519112229347, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05223511904478073, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0505199134349823, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04469478130340576, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04245838522911072, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.028751326724886894, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02497474104166031, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.023876061663031578, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.023609066382050514, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0143875228241086, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012236833572387695, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012125065550208092, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011192257516086102, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011020679026842117, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007513662800192833, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007406352087855339, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007148453500121832, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0047638798132538795, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0143875228241086, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0143875228241086, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.57.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2450798600912094, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22996516525745392, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22516638040542603, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20417667925357819, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11532915383577347, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10980392247438431, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1285412311553955, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11871901154518127, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11697036772966385, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10355068743228912, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0982193723320961, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06536222249269485, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.056688256561756134, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05514279007911682, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05478125810623169, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.032600753009319305, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027983175590634346, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027867630124092102, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02552025020122528, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.025289934128522873, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01674441061913967, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01633082889020443, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016177568584680557, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010019710287451744, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01674441061913967, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016177568584680557, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.57.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.183274045586586, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1521272510290146, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.14217416942119598, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1096462607383728, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08480934053659439, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07267852872610092, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.09849521517753601, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09049668163061142, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.08817601948976517, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06264105439186096, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0531437061727047, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05064600706100464, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.043505698442459106, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04092917591333389, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04029950127005577, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.025329818949103355, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.021404515951871872, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0212536808103323, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01707879640161991, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.016625037416815758, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013600511476397514, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.013690325431525707, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.012783128768205643, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009532006457448006, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01707879640161991, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.016625037416815758, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.57.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.18179450929164886, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.17066188156604767, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.167144313454628, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1517380326986313, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.085908904671669, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08186615258455276, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09561626613140106, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08833402395248413, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08716094493865967, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.0772070437669754, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07327118515968323, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04880214110016823, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04236192628741264, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04125809296965599, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04100073501467705, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.024411434307694435, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02130781300365925, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02123510092496872, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019540585577487946, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01937447488307953, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012817404232919216, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01299276202917099, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012454565614461899, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00877783540636301, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012817404232919216, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012817404232919216, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.57.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.21744726598262787, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.20424434542655945, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20012278854846954, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.18159887194633484, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10262348502874374, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09782541543245316, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11432588845491409, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10549012571573257, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.1041029691696167, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09223605692386627, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08754514902830124, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.058286331593990326, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.0504646934568882, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04915958642959595, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04885522276163101, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.029103389009833336, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.025107163935899734, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02501971274614334, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.022961782291531563, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.022766409441828728, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015143631026148796, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01487799733877182, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014712660573422909, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009488599374890327, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015143631026148796, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015143631026148796, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.57.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2481222301721573, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.21545866131782532, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.20004630088806152, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.17625688016414642, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.1138523668050766, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09936961531639099, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.14130274951457977, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.1295398771762848, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11919943988323212, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.0953921303153038, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.0902983620762825, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07202193886041641, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06219318136572838, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05503135547041893, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05323617532849312, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.036205682903528214, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.028831277042627335, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.028143256902694702, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.025172192603349686, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.023962736129760742, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01933198794722557, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.019088422879576683, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.0170349832624197, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01290696207433939, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.0170349832624197, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.0170349832624197, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.58.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12316393107175827, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1152312308549881, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11207544803619385, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10145021975040436, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.057748496532440186, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0545586422085762, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06562244892120361, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.060696445405483246, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.058631375432014465, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05177175626158714, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.049110300838947296, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03332759067416191, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02899865247309208, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.027677787467837334, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02735799364745617, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016672641038894653, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014171943068504333, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014045135118067265, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01294612791389227, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012744178995490074, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008696382865309715, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00857625249773264, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008271570317447186, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0054983580484986305, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016672641038894653, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016672641038894653, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.58.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09958913922309875, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09321700781583786, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.0904737338423729, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08192898333072662, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04672286659479141, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04404876008629799, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05337787792086601, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04931728541851044, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04741222411394119, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04188433662056923, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.039723969995975494, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02709811180830002, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.023562254384160042, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02239694632589817, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.022112566977739334, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013558323495090008, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011491512879729271, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011372358538210392, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010500378906726837, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010323463007807732, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007084421347826719, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0069954534992575645, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006707859691232443, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0045116618275642395, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013558323495090008, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013558323495090008, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.58.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2404862642288208, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22546572983264923, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.220657080411911, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20006100833415985, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11315275728702545, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10760956257581711, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12640777230262756, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11660841852426529, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11475080251693726, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10156555473804474, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09631529450416565, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06431370973587036, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.055692657828330994, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05411447584629059, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05374272167682648, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03208503872156143, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027519838884472847, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02739996276795864, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02511570230126381, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024872969835996628, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01652412675321102, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01616055518388748, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01592755690217018, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010037930682301521, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01652412675321102, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01592755690217018, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.58.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.18923428654670715, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.14905507862567902, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.13488154113292694, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.106031633913517, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08498932421207428, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.06939063221216202, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10383692383766174, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09524136036634445, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09135529398918152, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06066839396953583, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.055468522012233734, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.053472742438316345, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04594078287482262, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.041285522282123566, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.040123749524354935, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02685767598450184, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02195092849433422, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.021706433966755867, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.017338214442133904, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01650306023657322, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01452220045030117, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.014844425953924656, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013049856759607792, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010649891570210457, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.017338214442133904, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01650306023657322, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.58.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17505456507205963, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16435739398002625, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.16097015142440796, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1460610330104828, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08273754268884659, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07880456000566483, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09220205992460251, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08503860980272293, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08391447365283966, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07429490238428116, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0705254077911377, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04708080738782883, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.040799569338560104, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03974165767431259, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03948749601840973, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.023564007133245468, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020567983388900757, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020495371893048286, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.0188601091504097, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018702128902077675, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01245356909930706, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012601290829479694, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012107598595321178, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008581963367760181, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01245356909930706, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01245356909930706, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.58.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.20347391068935394, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.19106721878051758, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.187223881483078, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.16987861692905426, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09603015333414078, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09148859232664108, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10702849924564362, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09870119392871857, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09737516939640045, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.0862567126750946, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08191562443971634, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.054592911154031754, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.0472383089363575, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.046020884066820145, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04574029520153999, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.027276642620563507, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.023545872420072556, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.023465396836400032, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02153271622955799, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.021349120885133743, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014239093288779259, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014014527201652527, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.013834712095558643, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00902522075921297, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014239093288779259, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014239093288779259, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.58.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.24087336659431458, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.20778673887252808, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.19263164699077606, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.16907784342765808, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11054356396198273, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.096039317548275, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1379508227109909, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12561239302158356, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11596158146858215, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09185990691184998, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08694591373205185, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07046809047460556, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06053834781050682, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05367274954915047, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05195179209113121, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.035585708916187286, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.028558779507875443, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.027925584465265274, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.024948319420218468, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02380937524139881, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.019406426697969437, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.019406214356422424, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017194543033838272, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013813300989568233, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017194543033838272, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017194543033838272, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.59.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11569864302873611, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10824338346719742, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10514848679304123, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09501991420984268, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05423090606927872, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.051139093935489655, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.061962224543094635, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.057134851813316345, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.055028174072504044, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04854360222816467, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04612931236624718, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0314733162522316, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02734716609120369, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026016613468527794, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02570267580449581, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015768442302942276, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013408404774963856, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01327160932123661, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012250793166458607, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012049481272697449, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008299516513943672, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008246011100709438, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007873110473155975, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00543284323066473, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015768442302942276, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015768442302942276, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.59.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09682721644639969, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09054607152938843, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08784028887748718, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07939153164625168, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.045392055064439774, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04271255433559418, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05220890790224075, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04815905541181564, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04606631398200989, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04065079987049103, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.038634900003671646, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.026541046798229218, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.023052068427205086, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.021795596927404404, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.021494455635547638, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013298739679157734, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011238154955208302, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011109787970781326, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010278810746967793, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01009051501750946, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007019367069005966, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006945634726434946, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006615875754505396, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004585629794746637, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013298739679157734, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013298739679157734, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.59.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22683724761009216, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.21252289414405823, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.2078956514596939, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1880713403224945, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10668045282363892, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10139872133731842, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11937937885522842, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1100502610206604, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1082085371017456, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09555495530366898, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09065103530883789, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06070079281926155, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05257114768028259, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05103037506341934, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05065350979566574, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.030294742435216904, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.025967318564653397, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02584419958293438, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023665741086006165, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023427125066518784, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015630193054676056, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015290874987840652, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015065680257976055, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00953554455190897, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015630193054676056, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015065680257976055, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.59.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.13468769192695618, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11863387376070023, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1110726147890091, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09100987762212753, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.062132179737091064, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05518430843949318, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.07639435678720474, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06972122937440872, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06409534066915512, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05095560476183891, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04497849941253662, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03939450532197952, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.03444672003388405, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.03106197714805603, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.03024318255484104, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.020247306674718857, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.017990069463849068, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.017678773030638695, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01610003039240837, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.015624170191586018, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.011954233050346375, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.013681513257324696, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.010981909930706024, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.011435380205512047, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.017990069463849068, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.015624170191586018, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.59.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.16580288112163544, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.15556976199150085, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15231984853744507, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.13816696405410767, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07834380120038986, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07460073381662369, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08739157021045685, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08057878911495209, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.0794660672545433, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07034194469451904, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06681445240974426, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.044701818376779556, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.038692399859428406, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03766815736889839, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03742231801152229, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.022399311885237694, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.019562436267733574, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.01948944292962551, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01794983446598053, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017797935754060745, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011917142197489738, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012079591862857342, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011577284894883633, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008340854197740555, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01794983446598053, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017797935754060745, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.59.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.18698565661907196, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1754467785358429, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.17182481288909912, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1559155285358429, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.0882704108953476, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.0840686708688736, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09844738245010376, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09076645970344543, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08954005688428879, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07927391678094864, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0752759650349617, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.050284307450056076, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04348263144493103, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.042344652116298676, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.042077530175447464, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.025155028328299522, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02175339125096798, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.0216749869287014, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019909320399165154, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019733652472496033, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01322865765541792, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013082537800073624, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01284873392432928, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008602742105722427, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01322865765541792, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01322865765541792, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.59.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.23169392347335815, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.19850872457027435, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.18395046889781952, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.16112950444221497, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10622227936983109, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09200228005647659, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13213783502578735, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12028013914823532, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11161864548921585, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.0877905786037445, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08299796283245087, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06781916320323944, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05822434276342392, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.051804300397634506, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05018891766667366, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.034400563687086105, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.027920883148908615, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.027360649779438972, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.024433504790067673, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.023390665650367737, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01911357045173645, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01935943029820919, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01712585985660553, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014306440949440002, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01712585985660553, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01712585985660553, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.60.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11353226751089096, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10632357746362686, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1032402515411377, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09344504773616791, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05326911062002182, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.050256870687007904, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06109218671917915, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05622784048318863, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05404667183756828, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04776441305875778, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.045530788600444794, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.031062031164765358, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.026909027248620987, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.025567444041371346, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.025247754529118538, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015578163787722588, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013171552680432796, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013038202188909054, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012055370956659317, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01185336709022522, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008219767361879349, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008097955957055092, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007801711093634367, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005327312741428614, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015578163787722588, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015578163787722588, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.60.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09217487275600433, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.0863473042845726, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08361152559518814, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07563374936580658, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04322269558906555, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0406641811132431, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04989397153258324, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04600135236978531, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04386969283223152, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.038763709366321564, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.036841023713350296, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.025340057909488678, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.022001860663294792, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.020753009244799614, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.020455999299883842, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012700908817350864, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.010701851919293404, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010565230622887611, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009796252474188805, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.00960591807961464, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006713696289807558, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006624029483646154, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006314415950328112, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004356687888503075, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012700908817350864, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012700908817350864, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.60.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23261821269989014, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.21810974180698395, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21347945928573608, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19323304295539856, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10943727940320969, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10410340130329132, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1222526803612709, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11276628822088242, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11102745682001114, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09812994301319122, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09300240129232407, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06217661499977112, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05389157682657242, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.052345797419548035, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05197726935148239, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03102291375398636, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026642529293894768, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0265355184674263, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024306366220116615, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024073153734207153, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015974638983607292, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015689434483647346, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01539827510714531, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00978390034288168, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015974638983607292, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01539827510714531, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.60.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.15051224827766418, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1273220330476761, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11789394170045853, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09708603471517563, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06739640235900879, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05950259789824486, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.08495884388685226, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.07713377475738525, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.07217849791049957, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05466588959097862, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.049366164952516556, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.04380977898836136, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.03794058784842491, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.03364531323313713, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.03255051001906395, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02248971164226532, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01925302855670452, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.018958449363708496, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.017052721232175827, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01641751453280449, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013289953581988811, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0145400520414114, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.012079490348696709, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.011937811970710754, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.017052721232175827, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01641751453280449, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.60.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.13678398728370667, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1284160315990448, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.12577690184116364, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1140860989689827, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.06493190675973892, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.0619039349257946, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.0725104808807373, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.06685522943735123, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.0658862367272377, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.05840642377734184, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.05561469495296478, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.0374361015856266, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.032715000212192535, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03187280148267746, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.031677957624197006, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.018938729539513588, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.017617736011743546, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.017564821988344193, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01641242578625679, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.016300909221172333, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.010777820833027363, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012258846312761307, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.010523254051804543, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009920470416545868, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.017617736011743546, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.016300909221172333, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.60.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.14803238213062286, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.13886848092079163, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.13597887754440308, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.12340714782476425, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07027542591094971, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.06696858257055283, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.07856351137161255, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.07228369265794754, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07129128277301788, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.06319969147443771, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06025055795907974, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.040570273995399475, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03536438196897507, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.0344671905040741, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03425852954387665, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02055579051375389, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.01902596466243267, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.018977202475070953, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.017732955515384674, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017616499215364456, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011761399917304516, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013218825682997704, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.0114801786839962, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.01068806555122137, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.017732955515384674, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017616499215364456, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.60.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.18871282041072845, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.16061557829380035, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.1483663022518158, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.12948623299598694, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.08657529950141907, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.07457157224416733, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.10972224175930023, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.098201684653759, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.09078726172447205, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.07112192362546921, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.06762811541557312, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.05652506276965141, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.04813305661082268, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.04284831881523132, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04151541367173195, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.029197948053479195, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.024075469002127647, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.023612286895513535, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.021313074976205826, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.020491909235715866, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01711278222501278, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.017701003700494766, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015469490550458431, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014141887426376343, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01711278222501278, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015469490550458431, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.61.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10487393289804459, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.0982346460223198, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09517446905374527, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08617954701185226, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04910210520029068, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04625311866402626, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05687929689884186, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.052215319126844406, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.049853742122650146, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04411550611257553, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04205607995390892, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.028942907229065895, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.024985868483781815, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.023570820689201355, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0232315044850111, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01449948363006115, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012158120982348919, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012007597833871841, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0111390994861722, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01092058140784502, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00764203816652298, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007521830033510923, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007188559975475073, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00495566800236702, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01449948363006115, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01449948363006115, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } }, { "key": "model.layers.61.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.08144155144691467, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.07626143097877502, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.07374418526887894, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.06679636240005493, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.038146521896123886, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0358152836561203, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04445865750312805, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04083127900958061, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.03871448338031769, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.034265559166669846, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03260279819369316, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.022615553811192513, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.019558768719434738, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.01833510771393776, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.01804257370531559, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.011351266875863075, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.00951163936406374, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.009376836940646172, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.008729927241802216, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.008541660383343697, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00604962557554245, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.005988921970129013, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.005658622365444899, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004035535268485546, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.01833510771393776, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.01804257370531559, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.61.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21825166046619415, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.20465058088302612, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.2000945508480072, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1814698725938797, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10280527919530869, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09768635779619217, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11693164706230164, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10631364583969116, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1043112576007843, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09236033260822296, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08808884769678116, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05967090651392937, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05110219493508339, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.049420371651649475, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.048992518335580826, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.030165232717990875, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.025446973741054535, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02528700791299343, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023319056257605553, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02305310033261776, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015814658254384995, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01539827510714531, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015161647461354733, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010076367296278477, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015814658254384995, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015161647461354733, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.61.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.13414737582206726, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1014789491891861, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08979827910661697, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07798602432012558, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06066613271832466, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04630056768655777, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.07656248658895493, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06721880286931992, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06444711238145828, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.042239170521497726, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0415685661137104, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.039814069867134094, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.033598147332668304, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.03065953217446804, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.029923196882009506, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.020915856584906578, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.018026258796453476, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.017860176041722298, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.015071635134518147, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01463593915104866, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.012794667854905128, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.013870038092136383, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.011970431543886662, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.011807247065007687, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.018026258796453476, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.015071635134518147, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } } }, { "key": "model.layers.61.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.12721621990203857, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1193346157670021, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.11683904379606247, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.10593225806951523, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.05995689705014229, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.05710264667868614, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.06710349023342133, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.061700526624917984, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.060827795416116714, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.053837407380342484, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.051213882863521576, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.034260060638189316, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.029596932232379913, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.028806665912270546, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.02862056903541088, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.017163949087262154, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.01493468414992094, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.014880758710205555, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.013705633580684662, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.013589606620371342, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.009113121777772903, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.009195450693368912, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.008845151402056217, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.0063110473565757275, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.017163949087262154, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.01493468414992094, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.61.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.12285284698009491, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.11520491540431976, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.11279408633708954, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.10239346325397491, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.058250654488801956, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.05548732727766037, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.0653056874871254, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.05994793772697449, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.05910353735089302, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.05235067009925842, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.049854576587677, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.03371569141745567, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.029151184484362602, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.028387287631630898, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.028201108798384666, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.01704595796763897, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.015370654873549938, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.0153187932446599, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.014252736233174801, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.014144199900329113, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.009628654457628727, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.010336418636143208, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.009394196793437004, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008058493956923485, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.01704595796763897, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, "best_option": { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.0153187932446599, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } } }, { "key": "model.layers.61.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.078172966837883, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.0661793127655983, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.05981708690524101, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.052205294370651245, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.03595953807234764, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.030305687338113785, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.048587627708911896, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.042548149824142456, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.037824634462594986, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.02967616356909275, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.028499918058514595, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.024976443499326706, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.021219944581389427, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.01833559386432171, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.01761952042579651, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.01306186243891716, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.011093954555690289, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.01083013042807579, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.01009294018149376, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.009686361998319626, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.007914225570857525, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.00892744679003954, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.0070151095278561115, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.007664437871426344, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ], "best_option_max": { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.01833559386432171, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, "best_option": { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.01761952042579651, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } } } ], "last_module_idx": 126, "base_perplexity": 12.4410466671353 }