{ "block_size": "8_8_8", "d_embed": 1024, "d_head": 64, "d_inner": 4096, "d_model": 1024, "dropact": 0.0, "dropatt": 0.1, "dropout": 0.1, "n_head": 16, "pool_q_only": true, "pooling_size": 2, "pooling_type": "mean", "separate_cls": true, "vocab_size": 50000 }