furnqse commited on
Commit
52cccfa
1 Parent(s): 6cbcc79

Upload recipe.yaml

Browse files
Files changed (1) hide show
  1. recipe.yaml +37 -0
recipe.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ test_stage:
2
+ obcq_modifiers:
3
+ SmoothQuantModifier:
4
+ smoothing_strength: 0.8
5
+ mappings: [
6
+ [["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"], "re:.*input_layernorm"],
7
+ [["re:.*gate_proj", "re:.*up_proj"], "re:.*post_attention_layernorm"]
8
+ ]
9
+ QuantizationModifier:
10
+ ignore:
11
+ # These operations don't make sense to quantize
12
+ - LlamaRotaryEmbedding
13
+ - LlamaRMSNorm
14
+ - SiLUActivation
15
+ # Skip quantizing the BMMs
16
+ - QuantizableMatMul
17
+ # Skip quantizing the layers with the most sensitive activations
18
+ - model.layers.1.mlp.down_proj
19
+ - model.layers.28.mlp.down_proj
20
+ - model.layers.29.mlp.down_proj
21
+ - model.layers.30.mlp.down_proj
22
+ - model.layers.31.mlp.down_proj
23
+ post_oneshot_calibration: true
24
+ scheme_overrides:
25
+ Embedding:
26
+ input_activations: null
27
+ weights:
28
+ num_bits: 8
29
+ symmetric: false
30
+ SparseGPTModifier:
31
+ sparsity: 0.5
32
+ block_size: 128
33
+ sequential_update: true
34
+ quantize: true
35
+ percdamp: 0.01
36
+ mask_structure: "0:0"
37
+ targets: ["re:model.layers.\\d*$"]