{ "activation": "gelu", "bias": false, "d_model": 1536, "dff": null, "dropout_rate": 0.0, "max_block_size": 1024, "n_heads": 24, "n_layers": 24, "norm_first": true, "pos_enc_type": "RoPE", "use_flash_attention": true, "vocab_size": 50304 }