{ | |
"d_model": 768, | |
"debug": true, | |
"layer_norm_eps": 1e-05, | |
"d_vocab": 50257, | |
"init_range": 0.02, | |
"n_ctx": 128, | |
"d_head": 64, | |
"dt_head": 64, | |
"d_mlp": 2048, | |
"causal_attn": true, | |
"attn_type": "trittention", | |
"n_heads": 12, | |
"nt_heads": 2, | |
"n_layers": 1, | |
"dropout": 0.1, | |
"mlp_type": "all", | |
"with_ln": true, | |
"is_gated": false, | |
"has_mlp": true, | |
"order_attn": true, | |
"attn_eq": false, | |
"window_size": 16, | |
"look_backward": 1, | |
"pad_value": 0, | |
"autopad": true, | |
"freqs_cis": null | |
} |