File size: 802 Bytes
33c1ff7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
{
"activation": "gelu",
"bias": false,
"d_model": 2048,
"dff": null,
"dropout_rate": 0.0,
"max_block_size": 1024,
"n_heads_ra": 16,
"n_heads_sa": 16,
"n_layers": 24,
"norm_first": true,
"pos_enc_type": "RoPE",
"ra_kwargs": {
"n_kv_heads": 8,
"n_relations": 128,
"rel_activation": "identity",
"rel_proj_dim": 8,
"symmetric_rels": false
},
"ra_type": "relational_attention",
"sa_kwargs": {
"n_kv_heads": 8
},
"share_attn_params": false,
"symbol_retrieval": "symbolic_attention",
"symbol_retrieval_kwargs": {
"d_model": 2048,
"n_heads": 16,
"n_symbols": 2048,
"trainable_symbols": false
},
"symbol_retriever_config": {
"shared_symbol_retriever": true,
"weight_tie_symbol_library": false
},
"vocab_size": 50304
} |