{ | |
"n_layers": 6, | |
"d_model": 128, | |
"d_head": 64, | |
"n_heads": 8, | |
"d_mlp": 512, | |
"d_vocab": 61, | |
"n_ctx": 59, | |
"act_fn": "gelu", | |
"normalization_type": "LN", | |
"att_only": False, | |
"architecture": "mingpt" | |
} |
{ | |
"n_layers": 6, | |
"d_model": 128, | |
"d_head": 64, | |
"n_heads": 8, | |
"d_mlp": 512, | |
"d_vocab": 61, | |
"n_ctx": 59, | |
"act_fn": "gelu", | |
"normalization_type": "LN", | |
"att_only": False, | |
"architecture": "mingpt" | |
} |