File size: 935 Bytes

fed89b0

test_stage:
  obcq_modifiers:
    QuantizationModifier:
      ignore:
        # These operations don't make sense to quantize
        - MistralRotaryEmbedding
        - MistralRMSNorm
        - SiLUActivation
        # Skip quantizing the BMMs
        # - QuantizableMatMul
        # Skip quantizing the layers with the most sensitive activations
        - model.layers.1.mlp.down_proj
        - model.layers.31.mlp.down_proj
        - model.layers.30.mlp.down_proj
        - model.layers.30.mlp.gate_proj
        - model.layers.30.mlp.up_proj
      post_oneshot_calibration: true
      scheme_overrides:
        Embedding:
          input_activations: null
          weights:
            num_bits: 8
            symmetric: false
    SparseGPTModifier:
      sparsity: 0.5
      block_size: 128
      sequential_update: true
      quantize: true
      percdamp: 0.01
      mask_structure: "0:0"
      targets: ["re:model.layers.\\d*$"]