Text Generation
Transformers
Safetensors
English
mixtral
conversational
text-generation-inference
4-bit precision
TheBloke commited on
Commit
21c79ed
1 Parent(s): 7046d91

Update config.json for Transformers GPTQ support

Browse files
Files changed (1) hide show
  1. config.json +13 -1
config.json CHANGED
@@ -31,6 +31,18 @@
31
  "vocab_size": 32002,
32
  "quantization_config": {
33
  "bits": 4,
 
 
 
 
 
 
 
 
 
 
 
 
34
  "group_size": -1,
35
  "damp_percent": 0.1,
36
  "desc_act": true,
@@ -40,4 +52,4 @@
40
  "model_file_base_name": "model",
41
  "quant_method": "gptq"
42
  }
43
- }
 
31
  "vocab_size": 32002,
32
  "quantization_config": {
33
  "bits": 4,
34
+ "modules_in_block_to_quantize" : [
35
+ ["self_attn.k_proj", "self_attn.v_proj", "self_attn.q_proj"],
36
+ ["self_attn.o_proj"],
37
+ ["block_sparse_moe.experts.0.w1", "block_sparse_moe.experts.1.w1", "block_sparse_moe.experts.2.w1",
38
+ "block_sparse_moe.experts.3.w1", "block_sparse_moe.experts.4.w1", "block_sparse_moe.experts.5.w1",
39
+ "block_sparse_moe.experts.6.w1", "block_sparse_moe.experts.7.w1", "block_sparse_moe.experts.0.w3",
40
+ "block_sparse_moe.experts.1.w3", "block_sparse_moe.experts.2.w3", "block_sparse_moe.experts.3.w3",
41
+ "block_sparse_moe.experts.4.w3", "block_sparse_moe.experts.5.w3", "block_sparse_moe.experts.6.w3",
42
+ "block_sparse_moe.experts.7.w3" ],
43
+ ["block_sparse_moe.experts.0.w2", "block_sparse_moe.experts.1.w2", "block_sparse_moe.experts.2.w2",
44
+ "block_sparse_moe.experts.3.w2", "block_sparse_moe.experts.4.w2", "block_sparse_moe.experts.5.w2",
45
+ "block_sparse_moe.experts.6.w2", "block_sparse_moe.experts.7.w2" ] ],
46
  "group_size": -1,
47
  "damp_percent": 0.1,
48
  "desc_act": true,
 
52
  "model_file_base_name": "model",
53
  "quant_method": "gptq"
54
  }
55
+ }