File size: 1,933 Bytes
7094def |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
{
"config": {
"architecture": "union",
"configs": [
{
"alpha": 8,
"architecture": "lora",
"attn_matrices": [
"q",
"v"
],
"composition_mode": "add",
"dropout": 0.0,
"init_weights": "lora",
"intermediate_lora": false,
"leave_out": [],
"output_lora": false,
"r": 8,
"selfattn_lora": true,
"use_gating": true
},
{
"architecture": "prefix_tuning",
"bottleneck_size": 512,
"cross_prefix": true,
"dropout": 0.0,
"encoder_prefix": true,
"flat": false,
"leave_out": [],
"non_linearity": "tanh",
"prefix_length": 10,
"shared_gating": true,
"use_gating": true
},
{
"adapter_residual_before_ln": false,
"cross_adapter": false,
"factorized_phm_W": true,
"factorized_phm_rule": false,
"hypercomplex_nonlinearity": "glorot-uniform",
"init_weights": "bert",
"inv_adapter": null,
"inv_adapter_reduction_factor": null,
"is_parallel": false,
"learn_phm": true,
"leave_out": [],
"ln_after": false,
"ln_before": false,
"mh_adapter": false,
"non_linearity": "relu",
"original_ln_after": true,
"original_ln_before": true,
"output_adapter": true,
"phm_bias": true,
"phm_c_init": "normal",
"phm_dim": 4,
"phm_init_range": 0.0001,
"phm_layer": false,
"phm_rank": 1,
"reduction_factor": 16,
"residual_before_ln": true,
"scaling": 1.0,
"shared_W_phm": false,
"shared_phm_rule": true,
"use_gating": true
}
]
},
"hidden_size": 512,
"model_class": "T5AdapterModel",
"model_name": "t5-small",
"model_type": "t5",
"name": "xsum",
"version": "0.1.1"
} |