File size: 1,302 Bytes
5844bf3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
{
  "bits": 4,
  "dataset": "wikitext2",
  "group_size": 128,
  "damp_percent": 0.1,
  "desc_act": false,
  "sym": true,
  "true_sequential": true,
  "quant_method": "gptq",
  "modules_in_block_to_quantize": [
    [
      "self_attn.k_proj",
      "self_attn.v_proj",
      "self_attn.q_proj"
    ],
    [
      "self_attn.o_proj"
    ],
    [
      "block_sparse_moe.experts.0.w1",
      "block_sparse_moe.experts.1.w1",
      "block_sparse_moe.experts.2.w1",
      "block_sparse_moe.experts.3.w1",
      "block_sparse_moe.experts.4.w1",
      "block_sparse_moe.experts.5.w1",
      "block_sparse_moe.experts.6.w1",
      "block_sparse_moe.experts.7.w1",
      "block_sparse_moe.experts.0.w3",
      "block_sparse_moe.experts.1.w3",
      "block_sparse_moe.experts.2.w3",
      "block_sparse_moe.experts.3.w3",
      "block_sparse_moe.experts.4.w3",
      "block_sparse_moe.experts.5.w3",
      "block_sparse_moe.experts.6.w3",
      "block_sparse_moe.experts.7.w3"
    ],
    [
      "block_sparse_moe.experts.0.w2",
      "block_sparse_moe.experts.1.w2",
      "block_sparse_moe.experts.2.w2",
      "block_sparse_moe.experts.3.w2",
      "block_sparse_moe.experts.4.w2",
      "block_sparse_moe.experts.5.w2",
      "block_sparse_moe.experts.6.w2",
      "block_sparse_moe.experts.7.w2"
    ]
  ]
}