test_stage: obcq_modifiers: SmoothQuantModifier: smoothing_strength: 0.8 mappings: - - - re:.*q_proj - re:.*k_proj - re:.*v_proj - re:.*input_layernorm - - - re:.*gate_proj - re:.*up_proj - re:.*post_attention_layernorm - - - re:.*down_proj - re:.*up_proj QuantizationModifier: ignore: - LlamaRotaryEmbedding - LlamaRMSNorm - SiLUActivation - model.layers.1.mlp.down_proj - model.layers.30.mlp.down_proj - model.layers.0.mlp.down_proj post_oneshot_calibration: true scheme_overrides: Linear: weights: num_bits: 8 symmetric: true strategy: channel MatMulLeftInput_QK: input_activations: num_bits: 8 symmetric: true Embedding: input_activations: null weights: num_bits: 8 symmetric: false SparseGPTModifier: sparsity: 0.0 block_size: 128 sequential_update: false quantize: true percdamp: 0.01 mask_structure: 0:0 targets: - model.layers.0 - model.layers.1 - model.layers.2 - model.layers.3 - model.layers.4 - model.layers.5 - model.layers.6 - model.layers.7 - model.layers.8 - model.layers.9 - model.layers.10 - model.layers.11 - model.layers.12 - model.layers.13 - model.layers.14 - model.layers.15 - model.layers.16 - model.layers.17 - model.layers.18 - model.layers.19 - model.layers.20 - model.layers.21 - model.layers.22 - model.layers.23 - model.layers.24 - model.layers.25 - model.layers.26 - model.layers.27 - model.layers.28 - model.layers.29 - model.layers.30 - model.layers.31 - lm_head