{ | |
"activation": "gelu", | |
"bias": false, | |
"d_model": 1536, | |
"dff": null, | |
"dropout_rate": 0.0, | |
"max_block_size": 1024, | |
"n_heads": 24, | |
"n_layers": 24, | |
"norm_first": true, | |
"pos_enc_type": "RoPE", | |
"use_flash_attention": true, | |
"vocab_size": 50304 | |
} |