{ "activation": "gelu", "bias": false, "d_model": 2048, "dff": null, "dropout_rate": 0.0, "max_block_size": 1024, "n_heads_ra": 16, "n_heads_sa": 16, "n_layers": 24, "norm_first": true, "pos_enc_type": "RoPE", "ra_kwargs": { "n_kv_heads": 8, "n_relations": 64, "rel_activation": "identity", "rel_proj_dim": 16, "symmetric_rels": false }, "ra_type": "relational_attention", "sa_kwargs": { "n_kv_heads": 8 }, "share_attn_params": false, "symbol_retrieval": "symbolic_attention", "symbol_retrieval_kwargs": { "d_model": 2048, "n_heads": 8, "n_symbols": 2048, "trainable_symbols": false }, "symbol_retriever_config": { "shared_symbol_retriever": true, "weight_tie_symbol_library": false }, "vocab_size": 50304 }