test_stage: obcq_modifiers: SmoothQuantModifier: smoothing_strength: 0.8 mappings: [ [["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"], "re:.*input_layernorm"], [["re:.*gate_proj", "re:.*up_proj"], "re:.*post_attention_layernorm"] ] QuantizationModifier: ignore: - LlamaRotaryEmbedding - LlamaRMSNorm - SiLUActivation post_oneshot_calibration: True scheme_overrides: Embedding: input_activations: null weights: num_bits: 8 symmetric: False SparseGPTModifier: sparsity: 0.0 block_size: 128 sequential_update: False quantize: True percdamp: 0.01 mask_structure: "0:0" targets: [ "re:model.layers.\\d*$" ] target_ids: ["attention_mask", "position_ids"]