{ | |
"activation": "silu", | |
"bias": false, | |
"ckpt_iter": 10, | |
"d_model": 128, | |
"dropout": 0.2, | |
"hidden_dim": 128, | |
"mlp": "GLU", | |
"num_heads": 4, | |
"num_kv_heads": 0, | |
"num_layers": 4, | |
"seq_len": 10, | |
"vocab_size": 10, | |
"weight_tying": false | |
} |
{ | |
"activation": "silu", | |
"bias": false, | |
"ckpt_iter": 10, | |
"d_model": 128, | |
"dropout": 0.2, | |
"hidden_dim": 128, | |
"mlp": "GLU", | |
"num_heads": 4, | |
"num_kv_heads": 0, | |
"num_layers": 4, | |
"seq_len": 10, | |
"vocab_size": 10, | |
"weight_tying": false | |
} |