File size: 1,238 Bytes
83d7ce2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
{
"ampere_pruning_method": "disabled",
"attention_block_cols": 32,
"attention_block_rows": 32,
"attention_lambda": 1.0,
"attention_output_with_dense": 0,
"attention_pruning_method": "sigmoied_threshold",
"bias_mask": true,
"dense_block_cols": 1,
"dense_block_rows": 1,
"dense_lambda": 1.0,
"dense_pruning_method": "sigmoied_threshold:1d_alt",
"distil_alpha_ce": 0.1,
"distil_alpha_teacher": 0.9,
"distil_teacher_name_or_path": "textattack/bert-base-uncased-SST-2",
"distil_temperature": 2.0,
"eval_with_current_patch_params": 1,
"final_ampere_temperature": 20.0,
"final_finetune": false,
"final_threshold": 0.1,
"final_warmup": 1,
"gelu_patch": false,
"gelu_patch_steps": 50000,
"initial_ampere_temperature": 0.0,
"initial_threshold": 0,
"initial_warmup": 1,
"layer_norm_patch": 0,
"layer_norm_patch_start_delta": 0.99,
"layer_norm_patch_steps": 4210,
"linear_min_parameters": 0,
"mask_init": "constant",
"mask_scale": 0.0,
"mask_scores_learning_rate": 0.01,
"qat": false,
"qconfig": "default",
"regularization": "l1",
"regularization_final_lambda": 3.0,
"rewind_model_name_or_path": null
} |