{ | |
"attn_eq": true, | |
"attn_type": "Trittention", | |
"autopad": true, | |
"causal_attn": true, | |
"d_head": 64, | |
"d_mlp": 2048, | |
"d_model": 512, | |
"d_vocab": 50304, | |
"debug": true, | |
"dropout": 0.1, | |
"dt_head": 64, | |
"has_mlp": true, | |
"init_range": 0.01, | |
"is_gated": false, | |
"layer_norm_eps": 1e-05, | |
"look_backward": 1, | |
"mlp_type": "all", | |
"n_ctx": 65, | |
"n_heads": 8, | |
"n_layers": 1, | |
"nt_heads": 2, | |
"order_attn": false, | |
"pad_value": 0, | |
"share_input_output_embed": true, | |
"use_rotary": false, | |
"window_size": 16, | |
"with_ln": true | |
} |