File size: 852 Bytes

test_stage:
  obcq_modifiers:
    SmoothQuantModifier:
      smoothing_strength: 0.8
      mappings: [
        [["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"], "re:.*input_layernorm"],
        [["re:.*gate_proj", "re:.*up_proj"], "re:.*post_attention_layernorm"]
      ]
    QuantizationModifier:
      ignore:
        - LlamaRotaryEmbedding
        - LlamaRMSNorm
        - SiLUActivation
      post_oneshot_calibration: True
      scheme_overrides:
        Embedding:
          input_activations: null
          weights:
            num_bits: 8
            symmetric: False
    SparseGPTModifier:
      sparsity: 0.0
      block_size: 128
      sequential_update: False
      quantize: True
      percdamp: 0.01
      mask_structure: "0:0"
      targets: [
        "re:model.layers.\\d*$"
      ]
      target_ids: ["attention_mask", "position_ids"]