File size: 1,147 Bytes
52cccfa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
test_stage:
  obcq_modifiers:
    SmoothQuantModifier:
      smoothing_strength: 0.8
      mappings: [
        [["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"], "re:.*input_layernorm"],
        [["re:.*gate_proj", "re:.*up_proj"], "re:.*post_attention_layernorm"]
      ]
    QuantizationModifier:
      ignore:
      # These operations don't make sense to quantize
      - LlamaRotaryEmbedding
      - LlamaRMSNorm
      - SiLUActivation
      # Skip quantizing the BMMs
      - QuantizableMatMul
      # Skip quantizing the layers with the most sensitive activations
      - model.layers.1.mlp.down_proj
      - model.layers.28.mlp.down_proj
      - model.layers.29.mlp.down_proj
      - model.layers.30.mlp.down_proj
      - model.layers.31.mlp.down_proj
      post_oneshot_calibration: true
      scheme_overrides:
        Embedding:
          input_activations: null
          weights:
            num_bits: 8
            symmetric: false
    SparseGPTModifier:
      sparsity: 0.5
      block_size: 128
      sequential_update: true
      quantize: true
      percdamp: 0.01
      mask_structure: "0:0"
      targets: ["re:model.layers.\\d*$"]