mwitiderrick's picture
Create quantize.yaml
6f7231f verified
test_stage:
obcq_modifiers:
LogarithmicEqualizationModifier:
mappings: [
[["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"], "re:.*input_layernorm"],
[["re:.*gate_proj", "re:.*up_proj"], "re:.*post_attention_layernorm"],
]
QuantizationModifier:
ignore:
# These operations don't make sense to quantize
- LlamaRotaryEmbedding
- LlamaRMSNorm
- SiLUActivation
- MatMulOutput_QK
- MatMulOutput_PV
# Skip quantizing the layers with the most sensitive activations
- model.layers.1.mlp.down_proj
- model.layers.30.mlp.down_proj
- model.layers.31.mlp.down_proj
- model.layers.28.mlp.down_proj
- model.layers.29.mlp.down_proj
post_oneshot_calibration: true
scheme_overrides:
Linear:
weights:
num_bits: 8
symmetric: true
strategy: channel
MatMulLeftInput_QK:
input_activations:
num_bits: 8
symmetric: true
MatMulLeftInput_PV:
input_activations:
num_bits: 8
symmetric: true
Embedding:
input_activations: null
weights:
num_bits: 8
symmetric: false