trisol_O / config.json
sabalanya's picture
Push model using huggingface_hub.
84123e5 verified
{
"d_model": 768,
"debug": true,
"layer_norm_eps": 1e-05,
"d_vocab": 50257,
"init_range": 0.02,
"n_ctx": 128,
"d_head": 64,
"dt_head": 64,
"d_mlp": 2048,
"causal_attn": true,
"attn_type": "trittention",
"n_heads": 12,
"nt_heads": 2,
"n_layers": 1,
"dropout": 0.1,
"mlp_type": "all",
"with_ln": true,
"is_gated": false,
"has_mlp": true,
"order_attn": true,
"attn_eq": false,
"window_size": 16,
"look_backward": 1,
"pad_value": 0,
"autopad": true,
"freqs_cis": null
}