{ | |
"activation": "gelu", | |
"bias": false, | |
"d_model": 2048, | |
"dff": null, | |
"dropout_rate": 0.0, | |
"max_block_size": 1024, | |
"n_heads_ra": 16, | |
"n_heads_sa": 16, | |
"n_layers": 24, | |
"norm_first": true, | |
"pos_enc_type": "RoPE", | |
"ra_kwargs": { | |
"n_kv_heads": 8, | |
"n_relations": 64, | |
"rel_activation": "identity", | |
"rel_proj_dim": 16, | |
"symmetric_rels": false | |
}, | |
"ra_type": "relational_attention", | |
"sa_kwargs": { | |
"n_kv_heads": 8 | |
}, | |
"share_attn_params": false, | |
"symbol_retrieval": "symbolic_attention", | |
"symbol_retrieval_kwargs": { | |
"d_model": 2048, | |
"n_heads": 8, | |
"n_symbols": 2048, | |
"trainable_symbols": false | |
}, | |
"symbol_retriever_config": { | |
"shared_symbol_retriever": true, | |
"weight_tie_symbol_library": false | |
}, | |
"vocab_size": 50304 | |
} |