mgoin's picture
Update recipe.yaml
8271dca verified
test_stage:
obcq_modifiers:
SmoothQuantModifier:
smoothing_strength: 0.8
mappings: [
[["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"], "re:.*input_layernorm"],
[["re:.*gate_proj", "re:.*up_proj"], "re:.*post_attention_layernorm"]
]
QuantizationModifier:
ignore:
- LlamaRotaryEmbedding
- LlamaRMSNorm
- SiLUActivation
post_oneshot_calibration: True
scheme_overrides:
Embedding:
input_activations: null
weights:
num_bits: 8
symmetric: False
SparseGPTModifier:
sparsity: 0.0
block_size: 128
sequential_update: False
quantize: True
percdamp: 0.01
mask_structure: "0:0"
targets: [
"re:model.layers.\\d*$"
]
target_ids: ["attention_mask", "position_ids"]