File size: 1,302 Bytes
5844bf3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
{
"bits": 4,
"dataset": "wikitext2",
"group_size": 128,
"damp_percent": 0.1,
"desc_act": false,
"sym": true,
"true_sequential": true,
"quant_method": "gptq",
"modules_in_block_to_quantize": [
[
"self_attn.k_proj",
"self_attn.v_proj",
"self_attn.q_proj"
],
[
"self_attn.o_proj"
],
[
"block_sparse_moe.experts.0.w1",
"block_sparse_moe.experts.1.w1",
"block_sparse_moe.experts.2.w1",
"block_sparse_moe.experts.3.w1",
"block_sparse_moe.experts.4.w1",
"block_sparse_moe.experts.5.w1",
"block_sparse_moe.experts.6.w1",
"block_sparse_moe.experts.7.w1",
"block_sparse_moe.experts.0.w3",
"block_sparse_moe.experts.1.w3",
"block_sparse_moe.experts.2.w3",
"block_sparse_moe.experts.3.w3",
"block_sparse_moe.experts.4.w3",
"block_sparse_moe.experts.5.w3",
"block_sparse_moe.experts.6.w3",
"block_sparse_moe.experts.7.w3"
],
[
"block_sparse_moe.experts.0.w2",
"block_sparse_moe.experts.1.w2",
"block_sparse_moe.experts.2.w2",
"block_sparse_moe.experts.3.w2",
"block_sparse_moe.experts.4.w2",
"block_sparse_moe.experts.5.w2",
"block_sparse_moe.experts.6.w2",
"block_sparse_moe.experts.7.w2"
]
]
} |