mwitiderrick's picture
Upload folder using huggingface_hub
2175f24 verified
test_stage_0:
obcq_modifiers:
LogarithmicEqualizationModifier:
mappings:
- - - re:.*q_proj
- re:.*k_proj
- re:.*v_proj
- re:.*input_layernorm
- - - re:.*gate_proj
- re:.*up_proj
- re:.*post_attention_layernorm
QuantizationModifier:
ignore:
- LlamaRotaryEmbedding
- LlamaRMSNorm
- SiLUActivation
- MatMulOutput_QK
- MatMulOutput_PV
- model.layers.21.mlp.down_proj
- model.layers.7.mlp.down_proj
- model.layers.2.mlp.down_proj
- model.layers.8.self_attn.q_proj
- model.layers.8.self_attn.k_proj
post_oneshot_calibration: true
scheme_overrides:
Linear:
weights:
num_bits: 8
symmetric: true
strategy: channel
MatMulLeftInput_QK:
input_activations:
num_bits: 8
symmetric: true
MatMulLeftInput_PV:
input_activations:
num_bits: 8
symmetric: true
Embedding:
input_activations: null
weights:
num_bits: 8
symmetric: false
SparseGPTModifier:
sparsity: 0.5
block_size: 128
sequential_update: true
quantize: true
percdamp: 0.01
mask_structure: 0:0
targets:
- re:model.layers.\d*$
test_stage_1:
obcq_modifiers:
LogarithmicEqualizationModifier:
mappings:
- - - re:.*q_proj
- re:.*k_proj
- re:.*v_proj
- re:.*input_layernorm
- - - re:.*gate_proj
- re:.*up_proj
- re:.*post_attention_layernorm
QuantizationModifier:
ignore:
- LlamaRotaryEmbedding
- LlamaRMSNorm
- SiLUActivation
- MatMulOutput_QK
- MatMulOutput_PV
- model.layers.21.mlp.down_proj
- model.layers.7.mlp.down_proj
- model.layers.2.mlp.down_proj
- model.layers.8.self_attn.q_proj
- model.layers.8.self_attn.k_proj
post_oneshot_calibration: true
scheme_overrides:
Linear:
weights:
num_bits: 8
symmetric: true
strategy: channel
MatMulLeftInput_QK:
input_activations:
num_bits: 8
symmetric: true
MatMulLeftInput_PV:
input_activations:
num_bits: 8
symmetric: true
Embedding:
input_activations: null
weights:
num_bits: 8
symmetric: false
SparseGPTModifier:
sparsity: 0.5
block_size: 128
sequential_update: true
quantize: true
percdamp: 0.01
mask_structure: 0:0
targets:
- re:model.layers.\d*$