File size: 1,238 Bytes
83d7ce2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
{
    "ampere_pruning_method": "disabled",
    "attention_block_cols": 32,
    "attention_block_rows": 32,
    "attention_lambda": 1.0,
    "attention_output_with_dense": 0,
    "attention_pruning_method": "sigmoied_threshold",
    "bias_mask": true,
    "dense_block_cols": 1,
    "dense_block_rows": 1,
    "dense_lambda": 1.0,
    "dense_pruning_method": "sigmoied_threshold:1d_alt",
    "distil_alpha_ce": 0.1,
    "distil_alpha_teacher": 0.9,
    "distil_teacher_name_or_path": "textattack/bert-base-uncased-SST-2",
    "distil_temperature": 2.0,
    "eval_with_current_patch_params": 1,
    "final_ampere_temperature": 20.0,
    "final_finetune": false,
    "final_threshold": 0.1,
    "final_warmup": 1,
    "gelu_patch": false,
    "gelu_patch_steps": 50000,
    "initial_ampere_temperature": 0.0,
    "initial_threshold": 0,
    "initial_warmup": 1,
    "layer_norm_patch": 0,
    "layer_norm_patch_start_delta": 0.99,
    "layer_norm_patch_steps": 4210,
    "linear_min_parameters": 0,
    "mask_init": "constant",
    "mask_scale": 0.0,
    "mask_scores_learning_rate": 0.01,
    "qat": false,
    "qconfig": "default",
    "regularization": "l1",
    "regularization_final_lambda": 3.0,
    "rewind_model_name_or_path": null
}