|
test_stage: |
|
obcq_modifiers: |
|
SmoothQuantModifier: |
|
smoothing_strength: 0.8 |
|
mappings: [ |
|
[["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"], "re:.*input_layernorm"], |
|
[["re:.*gate_proj", "re:.*up_proj"], "re:.*post_attention_layernorm"] |
|
] |
|
QuantizationModifier: |
|
ignore: |
|
- LlamaRotaryEmbedding |
|
- LlamaRMSNorm |
|
- SiLUActivation |
|
post_oneshot_calibration: True |
|
scheme_overrides: |
|
Embedding: |
|
input_activations: null |
|
weights: |
|
num_bits: 8 |
|
symmetric: False |
|
SparseGPTModifier: |
|
sparsity: 0.0 |
|
block_size: 128 |
|
sequential_update: False |
|
quantize: True |
|
percdamp: 0.01 |
|
mask_structure: "0:0" |
|
targets: [ |
|
"re:model.layers.\\d*$" |
|
] |
|
target_ids: ["attention_mask", "position_ids"] |