horheynm's picture
Upload folder using huggingface_hub
25433f3 verified
raw
history blame contribute delete
508 Bytes
DEFAULT_stage:
DEFAULT_modifiers:
SparseGPTModifier:
sparsity: 0.5
mask_structure: '2:4'
sequential_update: true
targets: ['re:model.layers.\d*$']
QuantizationModifier:
ignore: [lm_head]
targets: [Linear]
scheme: FP8_DYNAMIC
ConstantPruningModifier:
start: 0.0
targets: ['re:.*q_proj.weight', 're:.*k_proj.weight', 're:.*v_proj.weight', 're:.*o_proj.weight',
're:.*gate_proj.weight', 're:.*up_proj.weight', 're:.*down_proj.weight']